mirror of
https://github.com/outline/outline.git
synced 2026-06-13 11:25:03 +03:00
perf: Remove turndown (#11331)
* Remove turndown * Refactor htmlToProsemirror * fix: Bug in CSV import * refactor
This commit is contained in:
@@ -75,7 +75,6 @@
|
||||
"@hocuspocus/extension-throttle": "1.1.2",
|
||||
"@hocuspocus/provider": "1.1.2",
|
||||
"@hocuspocus/server": "1.1.2",
|
||||
"@joplin/turndown-plugin-gfm": "^1.0.49",
|
||||
"@juggle/resize-observer": "^3.4.0",
|
||||
"@linear/sdk": "^58.1.0",
|
||||
"@node-oauth/oauth2-server": "^5.2.0",
|
||||
@@ -253,7 +252,6 @@
|
||||
"tiny-cookie": "^2.5.1",
|
||||
"tmp": "^0.2.5",
|
||||
"tunnel-agent": "^0.6.0",
|
||||
"turndown": "^7.2.2",
|
||||
"ukkonen": "^2.2.0",
|
||||
"umzug": "^3.8.2",
|
||||
"utility-types": "^3.11.0",
|
||||
@@ -341,7 +339,6 @@
|
||||
"@types/styled-components": "^5.1.32",
|
||||
"@types/throng": "^5.0.7",
|
||||
"@types/tmp": "^0.2.6",
|
||||
"@types/turndown": "^5.0.6",
|
||||
"@types/utf8": "^3.0.3",
|
||||
"@types/validator": "^13.15.3",
|
||||
"@types/yauzl": "^2.10.3",
|
||||
|
||||
@@ -1,16 +1,11 @@
|
||||
import emojiRegex from "emoji-regex";
|
||||
import mime from "mime-types";
|
||||
import type { Node } from "prosemirror-model";
|
||||
import truncate from "lodash/truncate";
|
||||
import parseTitle from "@shared/utils/parseTitle";
|
||||
import type { ProsemirrorData } from "@shared/types";
|
||||
import { ProsemirrorHelper as SharedProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
|
||||
import { DocumentValidation } from "@shared/validations";
|
||||
import { serializer } from "@server/editor";
|
||||
import { traceFunction } from "@server/logging/tracing";
|
||||
import type { User } from "@server/models";
|
||||
import { ProsemirrorHelper } from "@server/models/helpers/ProsemirrorHelper";
|
||||
import { TextHelper } from "@server/models/helpers/TextHelper";
|
||||
import type { APIContext } from "@server/types";
|
||||
import { DocumentConverter } from "@server/utils/DocumentConverter";
|
||||
import { InvalidRequestError } from "../errors";
|
||||
@@ -33,14 +28,11 @@ type ImportResult = {
|
||||
/**
|
||||
* Converts document content to state and validates size constraints.
|
||||
*
|
||||
* @param content The document content as markdown text or Prosemirror JSON.
|
||||
* @param content The document content as Prosemirror JSON.
|
||||
* @param title The document title (used in error messages).
|
||||
* @returns The Y.Doc state buffer.
|
||||
*/
|
||||
function convertToState(
|
||||
content: string | ProsemirrorData,
|
||||
title: string
|
||||
): Buffer {
|
||||
function convertToState(content: ProsemirrorData, title: string): Buffer {
|
||||
const ydoc = ProsemirrorHelper.toYDoc(content);
|
||||
const state = ProsemirrorHelper.toState(ydoc);
|
||||
|
||||
@@ -53,92 +45,6 @@ function convertToState(
|
||||
return state;
|
||||
}
|
||||
|
||||
/**
|
||||
* Imports HTML content directly to Prosemirror, bypassing markdown conversion.
|
||||
*/
|
||||
async function importHtml(
|
||||
content: Buffer | string,
|
||||
title: string,
|
||||
user: User,
|
||||
ctx: APIContext
|
||||
): Promise<ImportResult> {
|
||||
let doc: Node = DocumentConverter.htmlToProsemirror(content);
|
||||
|
||||
// Extract title from first H1 if present
|
||||
const headings = SharedProsemirrorHelper.getHeadings(doc);
|
||||
if (headings.length > 0 && headings[0].level === 1) {
|
||||
title = headings[0].title;
|
||||
doc = ProsemirrorHelper.removeFirstHeading(doc);
|
||||
}
|
||||
|
||||
// Extract emoji from start of document
|
||||
const { emoji: icon, doc: docWithoutEmoji } =
|
||||
ProsemirrorHelper.extractEmojiFromStart(doc);
|
||||
doc = docWithoutEmoji;
|
||||
|
||||
// Replace external images with attachments
|
||||
doc = await TextHelper.replaceImagesWithAttachmentsInNode(ctx, doc, user);
|
||||
|
||||
const text = serializer.serialize(doc);
|
||||
title = truncate(title, { length: DocumentValidation.maxTitleLength });
|
||||
const state = convertToState(doc.toJSON(), title);
|
||||
|
||||
return { text, state, title, icon };
|
||||
}
|
||||
|
||||
/**
|
||||
* Imports content via markdown conversion (for docx, md, csv, etc.).
|
||||
*/
|
||||
async function importMarkdown(
|
||||
content: Buffer | string,
|
||||
fileName: string,
|
||||
mimeType: string,
|
||||
title: string,
|
||||
user: User,
|
||||
ctx: APIContext
|
||||
): Promise<ImportResult> {
|
||||
let text = await DocumentConverter.convertToMarkdown(
|
||||
content,
|
||||
fileName,
|
||||
mimeType
|
||||
);
|
||||
|
||||
// Find and extract emoji near the beginning of the document
|
||||
const regex = emojiRegex();
|
||||
const matches = regex.exec(text.slice(0, 10));
|
||||
const icon = matches ? matches[0] : undefined;
|
||||
if (icon) {
|
||||
text = text.replace(icon, "");
|
||||
}
|
||||
|
||||
// If the first line looks like a markdown heading, use it as the title
|
||||
if (text.startsWith("# ")) {
|
||||
const result = parseTitle(text);
|
||||
title = result.title;
|
||||
text = text.replace(/^.+(\n|$)/, "");
|
||||
}
|
||||
|
||||
// Replace any <br> generated by turndown with escaped newlines
|
||||
text = text.trim().replace(/<br>/gi, "\\n");
|
||||
|
||||
// Remove any closed and immediately reopened formatting marks
|
||||
text = text.replace(/\*\*\*\*/gi, "").replace(/____/gi, "");
|
||||
|
||||
text = await TextHelper.replaceImagesWithAttachments(ctx, text, user);
|
||||
|
||||
// Sanity check – text cannot possibly be longer than state
|
||||
if (text.length > DocumentValidation.maxStateLength) {
|
||||
throw InvalidRequestError(
|
||||
`The document "${title}" is too large to import, please reduce the length and try again`
|
||||
);
|
||||
}
|
||||
|
||||
title = truncate(title, { length: DocumentValidation.maxTitleLength });
|
||||
const state = convertToState(text, title);
|
||||
|
||||
return { text, state, title, icon };
|
||||
}
|
||||
|
||||
async function documentImporter({
|
||||
mimeType,
|
||||
fileName,
|
||||
@@ -154,18 +60,40 @@ async function documentImporter({
|
||||
"html",
|
||||
...(mime.extensions[mimeType] ?? []),
|
||||
];
|
||||
const title = fileName.replace(
|
||||
const fileTitle = fileName.replace(
|
||||
new RegExp(`\\.(${extensions.join("|")})$`, "i"),
|
||||
""
|
||||
);
|
||||
|
||||
const isHtml = mimeType === "text/html" || fileName.endsWith(".html");
|
||||
// Convert document using unified converter
|
||||
const {
|
||||
doc,
|
||||
title: extractedTitle,
|
||||
icon,
|
||||
} = await DocumentConverter.convert(content, fileName, mimeType);
|
||||
|
||||
if (isHtml) {
|
||||
return importHtml(content, title, user, ctx);
|
||||
// Use extracted title or fall back to filename
|
||||
let title = extractedTitle || fileTitle;
|
||||
|
||||
// Replace external images with attachments
|
||||
const processedDoc = await ProsemirrorHelper.replaceImagesWithAttachments(
|
||||
ctx,
|
||||
doc,
|
||||
user
|
||||
);
|
||||
|
||||
// Serialize final text and handle empty documents
|
||||
let text = serializer.serialize(processedDoc).trim();
|
||||
// Empty paragraphs serialize to escaped newlines/backslashes, treat as empty
|
||||
if (/^[\\\s]*$/.test(text)) {
|
||||
text = "";
|
||||
}
|
||||
|
||||
return importMarkdown(content, fileName, mimeType, title, user, ctx);
|
||||
// Truncate title and validate size
|
||||
title = truncate(title, { length: DocumentValidation.maxTitleLength });
|
||||
const state = convertToState(processedDoc.toJSON() as ProsemirrorData, title);
|
||||
|
||||
return { text, state, title, icon };
|
||||
}
|
||||
|
||||
export default traceFunction({
|
||||
|
||||
@@ -2,10 +2,14 @@ import { faker } from "@faker-js/faker";
|
||||
import type { DeepPartial } from "utility-types";
|
||||
import type { ProsemirrorData } from "@shared/types";
|
||||
import { MentionType } from "@shared/types";
|
||||
import { ProsemirrorHelper as SharedProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
|
||||
import { createContext } from "@server/context";
|
||||
import { buildProseMirrorDoc, buildUser } from "@server/test/factories";
|
||||
import type { MentionAttrs } from "./ProsemirrorHelper";
|
||||
import { ProsemirrorHelper } from "./ProsemirrorHelper";
|
||||
|
||||
jest.mock("@server/storage/files");
|
||||
|
||||
describe("ProsemirrorHelper", () => {
|
||||
describe("processMentions", () => {
|
||||
it("should handle deleted users", async () => {
|
||||
@@ -932,141 +936,166 @@ describe("ProsemirrorHelper", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("htmlToProsemirror", () => {
|
||||
it("should convert basic HTML to Prosemirror", () => {
|
||||
const html = "<p>Hello world</p>";
|
||||
describe("replaceImagesWithAttachments", () => {
|
||||
it("should return the same document when there are no images", async () => {
|
||||
const user = await buildUser();
|
||||
const ctx = createContext({ user });
|
||||
|
||||
const doc = ProsemirrorHelper.htmlToProsemirror(html);
|
||||
const doc = buildProseMirrorDoc([
|
||||
{
|
||||
type: "paragraph",
|
||||
content: [{ type: "text", text: "No images here" }],
|
||||
},
|
||||
]);
|
||||
|
||||
expect(doc.type.name).toBe("doc");
|
||||
expect(doc.content.childCount).toBe(1);
|
||||
expect(doc.content.child(0).type.name).toBe("paragraph");
|
||||
expect(doc.content.child(0).textContent).toBe("Hello world");
|
||||
const result = await ProsemirrorHelper.replaceImagesWithAttachments(
|
||||
ctx,
|
||||
doc,
|
||||
user
|
||||
);
|
||||
|
||||
expect(result.toJSON()).toEqual(doc.toJSON());
|
||||
});
|
||||
|
||||
it("should convert HTML with heading", () => {
|
||||
const html = "<h1>Title</h1><p>Content</p>";
|
||||
it("should correctly identify images in a document", () => {
|
||||
const doc = buildProseMirrorDoc([
|
||||
{
|
||||
type: "paragraph",
|
||||
content: [
|
||||
{
|
||||
type: "image",
|
||||
attrs: {
|
||||
src: "https://example.com/image.png",
|
||||
alt: "Test image",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const doc = ProsemirrorHelper.htmlToProsemirror(html);
|
||||
|
||||
expect(doc.content.childCount).toBe(2);
|
||||
expect(doc.content.child(0).type.name).toBe("heading");
|
||||
expect(doc.content.child(0).attrs.level).toBe(1);
|
||||
expect(doc.content.child(0).textContent).toBe("Title");
|
||||
expect(doc.content.child(1).type.name).toBe("paragraph");
|
||||
const images = SharedProsemirrorHelper.getImages(doc);
|
||||
expect(images.length).toBe(1);
|
||||
expect(images[0].attrs.src).toBe("https://example.com/image.png");
|
||||
expect(images[0].attrs.alt).toBe("Test image");
|
||||
});
|
||||
|
||||
it("should remove script tags", () => {
|
||||
const html = "<p>Safe content</p><script>alert('xss')</script>";
|
||||
it("should skip images with invalid URLs", async () => {
|
||||
const user = await buildUser();
|
||||
const ctx = createContext({ user });
|
||||
|
||||
const doc = ProsemirrorHelper.htmlToProsemirror(html);
|
||||
const doc = buildProseMirrorDoc([
|
||||
{
|
||||
type: "paragraph",
|
||||
content: [
|
||||
{
|
||||
type: "image",
|
||||
attrs: {
|
||||
src: "not-a-valid-url",
|
||||
alt: "Invalid",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
expect(doc.textContent).toBe("Safe content");
|
||||
expect(doc.textContent).not.toContain("alert");
|
||||
const result = await ProsemirrorHelper.replaceImagesWithAttachments(
|
||||
ctx,
|
||||
doc,
|
||||
user
|
||||
);
|
||||
|
||||
// Document should remain unchanged since URL is invalid
|
||||
expect(result.toJSON()).toEqual(doc.toJSON());
|
||||
});
|
||||
|
||||
it("should remove style tags", () => {
|
||||
const html = "<style>body { color: red; }</style><p>Content</p>";
|
||||
it("should skip images with internal URLs", async () => {
|
||||
const user = await buildUser();
|
||||
const ctx = createContext({ user });
|
||||
|
||||
const doc = ProsemirrorHelper.htmlToProsemirror(html);
|
||||
const doc = buildProseMirrorDoc([
|
||||
{
|
||||
type: "paragraph",
|
||||
content: [
|
||||
{
|
||||
type: "image",
|
||||
attrs: {
|
||||
src: "/api/attachments.redirect?id=existing-id",
|
||||
alt: "Internal",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
expect(doc.textContent).toBe("Content");
|
||||
expect(doc.textContent).not.toContain("color");
|
||||
const result = await ProsemirrorHelper.replaceImagesWithAttachments(
|
||||
ctx,
|
||||
doc,
|
||||
user
|
||||
);
|
||||
|
||||
// Document should remain unchanged since URL is internal
|
||||
expect(result.toJSON()).toEqual(doc.toJSON());
|
||||
});
|
||||
|
||||
it("should handle Buffer input", () => {
|
||||
const html = Buffer.from("<p>From buffer</p>", "utf8");
|
||||
it("should handle document with multiple node types", async () => {
|
||||
const user = await buildUser();
|
||||
const ctx = createContext({ user });
|
||||
|
||||
const doc = ProsemirrorHelper.htmlToProsemirror(html);
|
||||
const doc = buildProseMirrorDoc([
|
||||
{
|
||||
type: "heading",
|
||||
attrs: { level: 1 },
|
||||
content: [{ type: "text", text: "Title" }],
|
||||
},
|
||||
{
|
||||
type: "paragraph",
|
||||
content: [{ type: "text", text: "Some text" }],
|
||||
},
|
||||
{
|
||||
type: "paragraph",
|
||||
content: [
|
||||
{
|
||||
type: "image",
|
||||
attrs: {
|
||||
src: "invalid-url",
|
||||
alt: "Image",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
expect(doc.content.child(0).textContent).toBe("From buffer");
|
||||
const result = await ProsemirrorHelper.replaceImagesWithAttachments(
|
||||
ctx,
|
||||
doc,
|
||||
user
|
||||
);
|
||||
|
||||
// Document structure should be preserved
|
||||
expect(result.content.childCount).toBe(3);
|
||||
expect(result.content.child(0).type.name).toBe("heading");
|
||||
expect(result.content.child(1).type.name).toBe("paragraph");
|
||||
expect(result.content.child(2).type.name).toBe("paragraph");
|
||||
});
|
||||
|
||||
it("should convert HTML with lists", () => {
|
||||
const html = "<ul><li>Item 1</li><li>Item 2</li></ul>";
|
||||
it("should handle empty document", async () => {
|
||||
const user = await buildUser();
|
||||
const ctx = createContext({ user });
|
||||
|
||||
const doc = ProsemirrorHelper.htmlToProsemirror(html);
|
||||
const doc = buildProseMirrorDoc([
|
||||
{
|
||||
type: "paragraph",
|
||||
content: [],
|
||||
},
|
||||
]);
|
||||
|
||||
expect(doc.content.childCount).toBe(1);
|
||||
expect(doc.content.child(0).type.name).toBe("bullet_list");
|
||||
expect(doc.content.child(0).content.childCount).toBe(2);
|
||||
});
|
||||
const result = await ProsemirrorHelper.replaceImagesWithAttachments(
|
||||
ctx,
|
||||
doc,
|
||||
user
|
||||
);
|
||||
|
||||
it("should convert HTML with bold and italic", () => {
|
||||
const html = "<p><strong>Bold</strong> and <em>italic</em></p>";
|
||||
|
||||
const doc = ProsemirrorHelper.htmlToProsemirror(html);
|
||||
|
||||
const paragraph = doc.content.child(0);
|
||||
expect(paragraph.type.name).toBe("paragraph");
|
||||
|
||||
// Check that marks are applied
|
||||
const boldText = paragraph.content.child(0);
|
||||
expect(boldText.text).toBe("Bold");
|
||||
expect(boldText.marks.some((m) => m.type.name === "strong")).toBe(true);
|
||||
|
||||
const italicText = paragraph.content.child(2);
|
||||
expect(italicText.text).toBe("italic");
|
||||
expect(italicText.marks.some((m) => m.type.name === "em")).toBe(true);
|
||||
});
|
||||
|
||||
it("should handle full HTML document", () => {
|
||||
const html = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test</title>
|
||||
<meta charset="utf-8">
|
||||
</head>
|
||||
<body>
|
||||
<h1>Document Title</h1>
|
||||
<p>Paragraph content</p>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const doc = ProsemirrorHelper.htmlToProsemirror(html);
|
||||
|
||||
expect(doc.content.childCount).toBe(2);
|
||||
expect(doc.content.child(0).type.name).toBe("heading");
|
||||
expect(doc.content.child(0).textContent).toBe("Document Title");
|
||||
expect(doc.content.child(1).type.name).toBe("paragraph");
|
||||
expect(doc.content.child(1).textContent).toBe("Paragraph content");
|
||||
});
|
||||
|
||||
it("should remove emoticon images", () => {
|
||||
const html = `<p>Hello <img class="emoticon" src="smile.png" alt=":)"> world</p>`;
|
||||
|
||||
const doc = ProsemirrorHelper.htmlToProsemirror(html);
|
||||
|
||||
// Emoticon image should be removed, text content remains
|
||||
expect(doc.textContent).not.toContain(":)");
|
||||
expect(doc.textContent).toContain("Hello");
|
||||
expect(doc.textContent).toContain("world");
|
||||
});
|
||||
|
||||
it("should remove Jira icon images", () => {
|
||||
const html = `
|
||||
<p>Issue: <span class="jira-issue-key"><img class="icon" src="icon.png">ABC-123</span></p>
|
||||
`;
|
||||
|
||||
const doc = ProsemirrorHelper.htmlToProsemirror(html);
|
||||
|
||||
expect(doc.textContent).toBe("Issue: ABC-123");
|
||||
});
|
||||
|
||||
it("should apply Confluence image sizing", () => {
|
||||
const html = `
|
||||
<p><img src="image.png" data-width="800" data-height="600" width="400"></p>
|
||||
`;
|
||||
|
||||
const doc = ProsemirrorHelper.htmlToProsemirror(html);
|
||||
|
||||
const paragraph = doc.content.child(0);
|
||||
const image = paragraph.content.child(0);
|
||||
expect(image.type.name).toBe("image");
|
||||
expect(image.attrs.width).toBe(400);
|
||||
expect(image.attrs.height).toBe(300);
|
||||
expect(result.toJSON()).toEqual(doc.toJSON());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,16 +1,13 @@
|
||||
import emojiRegex from "emoji-regex";
|
||||
import { JSDOM } from "jsdom";
|
||||
import chunk from "lodash/chunk";
|
||||
import compact from "lodash/compact";
|
||||
import { EditorState } from "prosemirror-state";
|
||||
import { EditorView } from "prosemirror-view";
|
||||
import flatten from "lodash/flatten";
|
||||
import isMatch from "lodash/isMatch";
|
||||
import uniq from "lodash/uniq";
|
||||
import {
|
||||
Node,
|
||||
Fragment,
|
||||
DOMParser as ProsemirrorDOMParser,
|
||||
} from "prosemirror-model";
|
||||
import { Node, Fragment } from "prosemirror-model";
|
||||
import { renderToString } from "react-dom/server";
|
||||
import styled, { ServerStyleSheet, ThemeProvider } from "styled-components";
|
||||
import { prosemirrorToYDoc } from "y-prosemirror";
|
||||
@@ -22,17 +19,23 @@ import EditorContainer from "@shared/editor/components/Styles";
|
||||
import GlobalStyles from "@shared/styles/globals";
|
||||
import light from "@shared/styles/theme";
|
||||
import type { ProsemirrorData, UnfurlResponse } from "@shared/types";
|
||||
import { MentionType } from "@shared/types";
|
||||
import { attachmentRedirectRegex } from "@shared/utils/ProsemirrorHelper";
|
||||
import { AttachmentPreset, MentionType } from "@shared/types";
|
||||
import {
|
||||
attachmentRedirectRegex,
|
||||
ProsemirrorHelper as SharedProsemirrorHelper,
|
||||
} from "@shared/utils/ProsemirrorHelper";
|
||||
import parseDocumentSlug from "@shared/utils/parseDocumentSlug";
|
||||
import { isRTL } from "@shared/utils/rtl";
|
||||
import { isInternalUrl } from "@shared/utils/urls";
|
||||
import attachmentCreator from "@server/commands/attachmentCreator";
|
||||
import { plugins, schema, parser } from "@server/editor";
|
||||
import env from "@server/env";
|
||||
import Logger from "@server/logging/Logger";
|
||||
import { trace } from "@server/logging/tracing";
|
||||
import Attachment from "@server/models/Attachment";
|
||||
import User from "@server/models/User";
|
||||
import FileStorage from "@server/storage/files";
|
||||
import type { APIContext } from "@server/types";
|
||||
|
||||
export type HTMLOptions = {
|
||||
/** A title, if it should be included */
|
||||
@@ -798,88 +801,14 @@ export class ProsemirrorHelper {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert HTML content directly to a Prosemirror document node.
|
||||
*
|
||||
* @param content The HTML content as a string or Buffer.
|
||||
* @returns A Prosemirror Node representing the document.
|
||||
*/
|
||||
public static htmlToProsemirror(content: Buffer | string): Node {
|
||||
if (typeof content !== "string") {
|
||||
content = content.toString("utf8");
|
||||
}
|
||||
|
||||
const dom = new JSDOM(content);
|
||||
const document = dom.window.document;
|
||||
|
||||
// Remove problematic elements before parsing
|
||||
const elementsToRemove = document.querySelectorAll(
|
||||
"script, style, title, head, meta, link"
|
||||
);
|
||||
elementsToRemove.forEach((el) => el.remove());
|
||||
|
||||
// Preprocess the DOM to handle cases that turndown plugins handled
|
||||
this.preprocessHtmlForImport(document);
|
||||
|
||||
// Patch global environment for Prosemirror DOMParser
|
||||
const cleanup = this.patchGlobalEnv(dom.window);
|
||||
|
||||
try {
|
||||
const domParser = ProsemirrorDOMParser.fromSchema(schema);
|
||||
return domParser.parse(document.body);
|
||||
} finally {
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Preprocesses HTML DOM before Prosemirror parsing to cleanup
|
||||
* images and other elements.
|
||||
*
|
||||
* @param document The DOM document to preprocess.
|
||||
*/
|
||||
private static preprocessHtmlForImport(document: Document): void {
|
||||
// Handle images: filter emoticons, remove Jira icons, apply Confluence sizing
|
||||
const images = document.querySelectorAll("img");
|
||||
images.forEach((img) => {
|
||||
const className = img.className || "";
|
||||
|
||||
// Skip emoticon images (they'll be dropped)
|
||||
if (className.includes("emoticon")) {
|
||||
img.remove();
|
||||
return;
|
||||
}
|
||||
|
||||
// Remove Jira icon images
|
||||
if (
|
||||
className === "icon" &&
|
||||
img.parentElement?.className.includes("jira-issue-key")
|
||||
) {
|
||||
img.remove();
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle Confluence image sizing: data-width/data-height → width/height
|
||||
const dataWidth = img.getAttribute("data-width");
|
||||
const dataHeight = img.getAttribute("data-height");
|
||||
const width = img.getAttribute("width");
|
||||
|
||||
if (dataWidth && dataHeight && width) {
|
||||
const ratio = parseInt(dataWidth) / parseInt(width);
|
||||
const calculatedHeight = Math.round(parseInt(dataHeight) / ratio);
|
||||
img.setAttribute("height", String(calculatedHeight));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Patches the global environment with properties from the JSDOM window,
|
||||
* necessary for ProseMirror to run in a Node environment.
|
||||
*
|
||||
* @param domWindow The JSDOM window object
|
||||
* @returns A cleanup function to restore the global environment
|
||||
* @param domWindow The JSDOM window object.
|
||||
* @returns A cleanup function to restore the global environment.
|
||||
*/
|
||||
private static patchGlobalEnv(domWindow: JSDOM["window"]) {
|
||||
public static patchGlobalEnv(domWindow: JSDOM["window"]) {
|
||||
const g = global as any;
|
||||
|
||||
const globalParams = {
|
||||
@@ -922,4 +851,109 @@ export class ProsemirrorHelper {
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces remote and base64 encoded images in the given Prosemirror node
|
||||
* with attachment urls and uploads the images to the storage provider.
|
||||
*
|
||||
* @param ctx The API context.
|
||||
* @param doc The Prosemirror node to process.
|
||||
* @param user The user context.
|
||||
* @returns A new Prosemirror node with images replaced.
|
||||
*/
|
||||
static async replaceImagesWithAttachments(
|
||||
ctx: APIContext,
|
||||
doc: Node,
|
||||
user: User
|
||||
): Promise<Node> {
|
||||
const images = SharedProsemirrorHelper.getImages(doc);
|
||||
const videos = SharedProsemirrorHelper.getVideos(doc);
|
||||
const nodes = [...images, ...videos];
|
||||
|
||||
if (!nodes.length) {
|
||||
return doc;
|
||||
}
|
||||
|
||||
const timeoutPerImage = Math.floor(
|
||||
Math.min(env.REQUEST_TIMEOUT / nodes.length, 10000)
|
||||
);
|
||||
|
||||
const urlToAttachment: Map<string, Attachment> = new Map();
|
||||
const chunks = chunk(nodes, 10);
|
||||
|
||||
for (const nodeChunk of chunks) {
|
||||
await Promise.all(
|
||||
nodeChunk.map(async (node) => {
|
||||
const src = String(node.attrs.src ?? "");
|
||||
|
||||
// Skip invalid URLs
|
||||
try {
|
||||
new URL(src);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip internal URLs
|
||||
if (isInternalUrl(src)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip already processed
|
||||
if (urlToAttachment.has(src)) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const attachment = await attachmentCreator({
|
||||
name: String(node.attrs.alt ?? node.type.name),
|
||||
url: src,
|
||||
preset: AttachmentPreset.DocumentAttachment,
|
||||
user,
|
||||
fetchOptions: {
|
||||
timeout: timeoutPerImage,
|
||||
},
|
||||
ctx,
|
||||
});
|
||||
|
||||
if (attachment) {
|
||||
urlToAttachment.set(src, attachment);
|
||||
}
|
||||
} catch (err) {
|
||||
Logger.warn("Failed to download image for attachment", {
|
||||
error: err.message,
|
||||
src,
|
||||
});
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
// Transform the document to replace image/video src attributes
|
||||
const transformFragment = (fragment: Fragment): Fragment => {
|
||||
const transformedNodes: Node[] = [];
|
||||
|
||||
fragment.forEach((node) => {
|
||||
if (node.type.name === "image" || node.type.name === "video") {
|
||||
const src = String(node.attrs.src ?? "");
|
||||
const attachment = urlToAttachment.get(src);
|
||||
|
||||
if (attachment) {
|
||||
const json = node.toJSON();
|
||||
json.attrs = { ...json.attrs, src: attachment.redirectUrl };
|
||||
transformedNodes.push(Node.fromJSON(schema, json));
|
||||
} else {
|
||||
transformedNodes.push(node);
|
||||
}
|
||||
} else if (node.content.size > 0) {
|
||||
transformedNodes.push(node.copy(transformFragment(node.content)));
|
||||
} else {
|
||||
transformedNodes.push(node);
|
||||
}
|
||||
});
|
||||
|
||||
return Fragment.fromArray(transformedNodes);
|
||||
};
|
||||
|
||||
return doc.copy(transformFragment(doc.content));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import { ProsemirrorHelper as SharedProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
|
||||
import { createContext } from "@server/context";
|
||||
import { buildProseMirrorDoc, buildUser } from "@server/test/factories";
|
||||
import { TextHelper } from "./TextHelper";
|
||||
import { ProsemirrorHelper } from "./ProsemirrorHelper";
|
||||
|
||||
jest.mock("@server/storage/files");
|
||||
|
||||
describe("TextHelper", () => {
|
||||
describe("replaceImagesWithAttachmentsInNode", () => {
|
||||
describe("ProsemirrorHelper", () => {
|
||||
describe("replaceImagesWithAttachments", () => {
|
||||
it("should return the same document when there are no images", async () => {
|
||||
const user = await buildUser();
|
||||
const ctx = createContext({ user });
|
||||
@@ -18,7 +18,7 @@ describe("TextHelper", () => {
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await TextHelper.replaceImagesWithAttachmentsInNode(
|
||||
const result = await ProsemirrorHelper.replaceImagesWithAttachments(
|
||||
ctx,
|
||||
doc,
|
||||
user
|
||||
@@ -68,7 +68,7 @@ describe("TextHelper", () => {
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await TextHelper.replaceImagesWithAttachmentsInNode(
|
||||
const result = await ProsemirrorHelper.replaceImagesWithAttachments(
|
||||
ctx,
|
||||
doc,
|
||||
user
|
||||
@@ -97,7 +97,7 @@ describe("TextHelper", () => {
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await TextHelper.replaceImagesWithAttachmentsInNode(
|
||||
const result = await ProsemirrorHelper.replaceImagesWithAttachments(
|
||||
ctx,
|
||||
doc,
|
||||
user
|
||||
@@ -135,7 +135,7 @@ describe("TextHelper", () => {
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await TextHelper.replaceImagesWithAttachmentsInNode(
|
||||
const result = await ProsemirrorHelper.replaceImagesWithAttachments(
|
||||
ctx,
|
||||
doc,
|
||||
user
|
||||
@@ -159,7 +159,7 @@ describe("TextHelper", () => {
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await TextHelper.replaceImagesWithAttachmentsInNode(
|
||||
const result = await ProsemirrorHelper.replaceImagesWithAttachments(
|
||||
ctx,
|
||||
doc,
|
||||
user
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
import chunk from "lodash/chunk";
|
||||
import escapeRegExp from "lodash/escapeRegExp";
|
||||
import { Fragment, Node } from "prosemirror-model";
|
||||
import { AttachmentPreset } from "@shared/types";
|
||||
import { ProsemirrorHelper as SharedProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
|
||||
import { isInternalUrl } from "@shared/utils/urls";
|
||||
import attachmentCreator from "@server/commands/attachmentCreator";
|
||||
import { schema } from "@server/editor";
|
||||
import env from "@server/env";
|
||||
import Logger from "@server/logging/Logger";
|
||||
import { trace } from "@server/logging/tracing";
|
||||
@@ -14,7 +12,6 @@ import FileStorage from "@server/storage/files";
|
||||
import type { APIContext } from "@server/types";
|
||||
import parseAttachmentIds from "@server/utils/parseAttachmentIds";
|
||||
import parseImages from "@server/utils/parseImages";
|
||||
import { isInternalUrl } from "@shared/utils/urls";
|
||||
|
||||
@trace()
|
||||
export class TextHelper {
|
||||
@@ -131,109 +128,4 @@ export class TextHelper {
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces remote and base64 encoded images in the given Prosemirror node
|
||||
* with attachment urls and uploads the images to the storage provider.
|
||||
*
|
||||
* @param ctx The API context.
|
||||
* @param doc The Prosemirror node to process.
|
||||
* @param user The user context.
|
||||
* @returns A new Prosemirror node with images replaced.
|
||||
*/
|
||||
static async replaceImagesWithAttachmentsInNode(
|
||||
ctx: APIContext,
|
||||
doc: Node,
|
||||
user: User
|
||||
): Promise<Node> {
|
||||
const images = SharedProsemirrorHelper.getImages(doc);
|
||||
const videos = SharedProsemirrorHelper.getVideos(doc);
|
||||
const nodes = [...images, ...videos];
|
||||
|
||||
if (!nodes.length) {
|
||||
return doc;
|
||||
}
|
||||
|
||||
const timeoutPerImage = Math.floor(
|
||||
Math.min(env.REQUEST_TIMEOUT / nodes.length, 10000)
|
||||
);
|
||||
|
||||
const urlToAttachment: Map<string, Attachment> = new Map();
|
||||
const chunks = chunk(nodes, 10);
|
||||
|
||||
for (const nodeChunk of chunks) {
|
||||
await Promise.all(
|
||||
nodeChunk.map(async (node) => {
|
||||
const src = String(node.attrs.src ?? "");
|
||||
|
||||
// Skip invalid URLs
|
||||
try {
|
||||
new URL(src);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip internal URLs
|
||||
if (isInternalUrl(src)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip already processed
|
||||
if (urlToAttachment.has(src)) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const attachment = await attachmentCreator({
|
||||
name: String(node.attrs.alt ?? node.type.name),
|
||||
url: src,
|
||||
preset: AttachmentPreset.DocumentAttachment,
|
||||
user,
|
||||
fetchOptions: {
|
||||
timeout: timeoutPerImage,
|
||||
},
|
||||
ctx,
|
||||
});
|
||||
|
||||
if (attachment) {
|
||||
urlToAttachment.set(src, attachment);
|
||||
}
|
||||
} catch (err) {
|
||||
Logger.warn("Failed to download image for attachment", {
|
||||
error: err.message,
|
||||
src,
|
||||
});
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
// Transform the document to replace image/video src attributes
|
||||
const transformFragment = (fragment: Fragment): Fragment => {
|
||||
const transformedNodes: Node[] = [];
|
||||
|
||||
fragment.forEach((node) => {
|
||||
if (node.type.name === "image" || node.type.name === "video") {
|
||||
const src = String(node.attrs.src ?? "");
|
||||
const attachment = urlToAttachment.get(src);
|
||||
|
||||
if (attachment) {
|
||||
const json = node.toJSON();
|
||||
json.attrs = { ...json.attrs, src: attachment.redirectUrl };
|
||||
transformedNodes.push(Node.fromJSON(schema, json));
|
||||
} else {
|
||||
transformedNodes.push(node);
|
||||
}
|
||||
} else if (node.content.size > 0) {
|
||||
transformedNodes.push(node.copy(transformFragment(node.content)));
|
||||
} else {
|
||||
transformedNodes.push(node);
|
||||
}
|
||||
});
|
||||
|
||||
return Fragment.fromArray(transformedNodes);
|
||||
};
|
||||
|
||||
return doc.copy(transformFragment(doc.content));
|
||||
}
|
||||
}
|
||||
|
||||
Vendored
-9
@@ -11,15 +11,6 @@ declare module "email-providers" {
|
||||
export default list;
|
||||
}
|
||||
|
||||
declare module "@joplin/turndown-plugin-gfm" {
|
||||
import { Plugin } from "turndown";
|
||||
|
||||
export const strikethrough: Plugin;
|
||||
export const tables: Plugin;
|
||||
export const taskListItems: Plugin;
|
||||
export const gfm: Plugin;
|
||||
}
|
||||
|
||||
declare module "ukkonen" {
|
||||
export default function ukkonen(
|
||||
first: string,
|
||||
|
||||
@@ -1,31 +1,291 @@
|
||||
import { DocumentConverter } from "./DocumentConverter";
|
||||
|
||||
describe("csvToMarkdown", () => {
|
||||
it("should convert csv to markdown with comma", async () => {
|
||||
const csv = `name,age
|
||||
describe("DocumentConverter", () => {
|
||||
describe("convert", () => {
|
||||
describe("csv", () => {
|
||||
it("should convert csv to markdown table", async () => {
|
||||
const csv = `name,age
|
||||
John,25
|
||||
Jane,24`;
|
||||
|
||||
const markdown = `| name | age |
|
||||
| --- | --- |
|
||||
| John | 25 |
|
||||
| Jane | 24 |
|
||||
`;
|
||||
const result = await DocumentConverter.convert(
|
||||
csv,
|
||||
"test.csv",
|
||||
"text/csv"
|
||||
);
|
||||
|
||||
expect(await DocumentConverter.csvToMarkdown(csv)).toEqual(markdown);
|
||||
});
|
||||
// CSV is converted to a markdown table
|
||||
expect(result.text).toContain("| name | age |");
|
||||
expect(result.text).toContain("John");
|
||||
expect(result.text).toContain("Jane");
|
||||
expect(result.title).toEqual("");
|
||||
});
|
||||
|
||||
it("should convert csv to markdown with semicolon", async () => {
|
||||
const csv = `name;age
|
||||
it("should handle csv with semicolon delimiter", async () => {
|
||||
const csv = `name;age
|
||||
John;25
|
||||
"Joan ""the bone"", Anne";24`;
|
||||
|
||||
const markdown = `| name | age |
|
||||
| --- | --- |
|
||||
| John | 25 |
|
||||
| Joan "the bone", Anne | 24 |
|
||||
`;
|
||||
const result = await DocumentConverter.convert(
|
||||
csv,
|
||||
"test.csv",
|
||||
"text/csv"
|
||||
);
|
||||
|
||||
expect(await DocumentConverter.csvToMarkdown(csv)).toEqual(markdown);
|
||||
expect(result.text).toContain("| name | age |");
|
||||
expect(result.text).toContain("John");
|
||||
expect(result.text).toContain('Joan "the bone", Anne');
|
||||
});
|
||||
|
||||
it("should handle csv with title row before headers", async () => {
|
||||
// Some financial exports have a title row before the actual headers
|
||||
const csv = `"Report for Account"
|
||||
|
||||
"Symbol","Name","Value",
|
||||
"ABC","Test Corp","$100",
|
||||
"XYZ","Other Inc","$200",`;
|
||||
|
||||
const result = await DocumentConverter.convert(
|
||||
csv,
|
||||
"test.csv",
|
||||
"text/csv"
|
||||
);
|
||||
|
||||
// The actual data headers should be used, not the title row
|
||||
expect(result.text).toContain("| Symbol | Name | Value |");
|
||||
expect(result.text).toContain("ABC");
|
||||
expect(result.text).toContain("Test Corp");
|
||||
expect(result.text).toContain("XYZ");
|
||||
});
|
||||
|
||||
it("should handle csv with trailing comma on each line", async () => {
|
||||
const csv = `name,age,city,
|
||||
John,25,NYC,
|
||||
Jane,24,LA,`;
|
||||
|
||||
const result = await DocumentConverter.convert(
|
||||
csv,
|
||||
"test.csv",
|
||||
"text/csv"
|
||||
);
|
||||
|
||||
expect(result.text).toContain("| name | age | city |");
|
||||
expect(result.text).toContain("John");
|
||||
expect(result.text).toContain("Jane");
|
||||
// Should not have trailing empty column
|
||||
expect(result.text).not.toContain("| city | |");
|
||||
expect(result.text).not.toContain("| city | |");
|
||||
});
|
||||
|
||||
it("should preserve intentionally empty cells at end of rows", async () => {
|
||||
const csv = `name,age,city
|
||||
John,25,NYC
|
||||
Jane,24,`;
|
||||
|
||||
const result = await DocumentConverter.convert(
|
||||
csv,
|
||||
"test.csv",
|
||||
"text/csv"
|
||||
);
|
||||
|
||||
expect(result.text).toContain("| name | age | city |");
|
||||
expect(result.text).toContain("John");
|
||||
expect(result.text).toContain("NYC");
|
||||
// Jane's row should have 3 columns (empty city preserved)
|
||||
expect(result.text).toMatch(/\| Jane \| 24\s*\|\s*\|/);
|
||||
});
|
||||
});
|
||||
|
||||
describe("html", () => {
|
||||
it("should extract title from H1", async () => {
|
||||
const html = "<h1>My Title</h1><p>Content here</p>";
|
||||
const result = await DocumentConverter.convert(
|
||||
html,
|
||||
"test.html",
|
||||
"text/html"
|
||||
);
|
||||
|
||||
expect(result.title).toEqual("My Title");
|
||||
expect(result.text).toContain("Content here");
|
||||
expect(result.text).not.toContain("My Title");
|
||||
});
|
||||
|
||||
it("should extract emoji from start", async () => {
|
||||
const html = "<p>🚀 Launch content</p>";
|
||||
const result = await DocumentConverter.convert(
|
||||
html,
|
||||
"test.html",
|
||||
"text/html"
|
||||
);
|
||||
|
||||
expect(result.icon).toEqual("🚀");
|
||||
expect(result.text).not.toMatch(/^🚀/);
|
||||
});
|
||||
});
|
||||
|
||||
describe("markdown", () => {
|
||||
it("should extract title from H1", async () => {
|
||||
const md = "# My Title\n\nContent here";
|
||||
const result = await DocumentConverter.convert(
|
||||
md,
|
||||
"test.md",
|
||||
"text/markdown"
|
||||
);
|
||||
|
||||
expect(result.title).toEqual("My Title");
|
||||
expect(result.text).toContain("Content here");
|
||||
expect(result.text).not.toContain("My Title");
|
||||
});
|
||||
|
||||
it("should return empty title when no H1", async () => {
|
||||
const md = "## Subtitle\n\nContent here";
|
||||
const result = await DocumentConverter.convert(
|
||||
md,
|
||||
"test.md",
|
||||
"text/markdown"
|
||||
);
|
||||
|
||||
expect(result.title).toEqual("");
|
||||
expect(result.text).toContain("Subtitle");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("htmlToProsemirror", () => {
|
||||
it("should convert basic HTML to Prosemirror", () => {
|
||||
const html = "<p>Hello world</p>";
|
||||
|
||||
const doc = DocumentConverter.htmlToProsemirror(html);
|
||||
|
||||
expect(doc.type.name).toBe("doc");
|
||||
expect(doc.content.childCount).toBe(1);
|
||||
expect(doc.content.child(0).type.name).toBe("paragraph");
|
||||
expect(doc.content.child(0).textContent).toBe("Hello world");
|
||||
});
|
||||
|
||||
it("should convert HTML with heading", () => {
|
||||
const html = "<h1>Title</h1><p>Content</p>";
|
||||
|
||||
const doc = DocumentConverter.htmlToProsemirror(html);
|
||||
|
||||
expect(doc.content.childCount).toBe(2);
|
||||
expect(doc.content.child(0).type.name).toBe("heading");
|
||||
expect(doc.content.child(0).attrs.level).toBe(1);
|
||||
expect(doc.content.child(0).textContent).toBe("Title");
|
||||
expect(doc.content.child(1).type.name).toBe("paragraph");
|
||||
});
|
||||
|
||||
it("should remove script tags", () => {
|
||||
const html = "<p>Safe content</p><script>alert('xss')</script>";
|
||||
|
||||
const doc = DocumentConverter.htmlToProsemirror(html);
|
||||
|
||||
expect(doc.textContent).toBe("Safe content");
|
||||
expect(doc.textContent).not.toContain("alert");
|
||||
});
|
||||
|
||||
it("should remove style tags", () => {
|
||||
const html = "<style>body { color: red; }</style><p>Content</p>";
|
||||
|
||||
const doc = DocumentConverter.htmlToProsemirror(html);
|
||||
|
||||
expect(doc.textContent).toBe("Content");
|
||||
expect(doc.textContent).not.toContain("color");
|
||||
});
|
||||
|
||||
it("should handle Buffer input", () => {
|
||||
const html = Buffer.from("<p>From buffer</p>", "utf8");
|
||||
|
||||
const doc = DocumentConverter.htmlToProsemirror(html);
|
||||
|
||||
expect(doc.content.child(0).textContent).toBe("From buffer");
|
||||
});
|
||||
|
||||
it("should convert HTML with lists", () => {
|
||||
const html = "<ul><li>Item 1</li><li>Item 2</li></ul>";
|
||||
|
||||
const doc = DocumentConverter.htmlToProsemirror(html);
|
||||
|
||||
expect(doc.content.childCount).toBe(1);
|
||||
expect(doc.content.child(0).type.name).toBe("bullet_list");
|
||||
expect(doc.content.child(0).content.childCount).toBe(2);
|
||||
});
|
||||
|
||||
it("should convert HTML with bold and italic", () => {
|
||||
const html = "<p><strong>Bold</strong> and <em>italic</em></p>";
|
||||
|
||||
const doc = DocumentConverter.htmlToProsemirror(html);
|
||||
|
||||
const paragraph = doc.content.child(0);
|
||||
expect(paragraph.type.name).toBe("paragraph");
|
||||
|
||||
// Check that marks are applied
|
||||
const boldText = paragraph.content.child(0);
|
||||
expect(boldText.text).toBe("Bold");
|
||||
expect(boldText.marks.some((m) => m.type.name === "strong")).toBe(true);
|
||||
|
||||
const italicText = paragraph.content.child(2);
|
||||
expect(italicText.text).toBe("italic");
|
||||
expect(italicText.marks.some((m) => m.type.name === "em")).toBe(true);
|
||||
});
|
||||
|
||||
it("should handle full HTML document", () => {
|
||||
const html = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test</title>
|
||||
<meta charset="utf-8">
|
||||
</head>
|
||||
<body>
|
||||
<h1>Document Title</h1>
|
||||
<p>Paragraph content</p>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const doc = DocumentConverter.htmlToProsemirror(html);
|
||||
|
||||
expect(doc.content.childCount).toBe(2);
|
||||
expect(doc.content.child(0).type.name).toBe("heading");
|
||||
expect(doc.content.child(0).textContent).toBe("Document Title");
|
||||
expect(doc.content.child(1).type.name).toBe("paragraph");
|
||||
expect(doc.content.child(1).textContent).toBe("Paragraph content");
|
||||
});
|
||||
|
||||
it("should remove emoticon images", () => {
|
||||
const html = `<p>Hello <img class="emoticon" src="smile.png" alt=":)"> world</p>`;
|
||||
|
||||
const doc = DocumentConverter.htmlToProsemirror(html);
|
||||
|
||||
// Emoticon image should be removed, text content remains
|
||||
expect(doc.textContent).not.toContain(":)");
|
||||
expect(doc.textContent).toContain("Hello");
|
||||
expect(doc.textContent).toContain("world");
|
||||
});
|
||||
|
||||
it("should remove Jira icon images", () => {
|
||||
const html = `
|
||||
<p>Issue: <span class="jira-issue-key"><img class="icon" src="icon.png">ABC-123</span></p>
|
||||
`;
|
||||
|
||||
const doc = DocumentConverter.htmlToProsemirror(html);
|
||||
|
||||
expect(doc.textContent).toBe("Issue: ABC-123");
|
||||
});
|
||||
|
||||
it("should apply Confluence image sizing", () => {
|
||||
const html = `
|
||||
<p><img src="image.png" data-width="800" data-height="600" width="400"></p>
|
||||
`;
|
||||
|
||||
const doc = DocumentConverter.htmlToProsemirror(html);
|
||||
|
||||
const paragraph = doc.content.child(0);
|
||||
const image = paragraph.content.child(0);
|
||||
expect(image.type.name).toBe("image");
|
||||
expect(image.attrs.width).toBe(400);
|
||||
expect(image.attrs.height).toBe(300);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
+333
-117
@@ -1,89 +1,79 @@
|
||||
import { parse } from "@fast-csv/parse";
|
||||
import { JSDOM } from "jsdom";
|
||||
import escapeRegExp from "lodash/escapeRegExp";
|
||||
import { simpleParser } from "mailparser";
|
||||
import mammoth from "mammoth";
|
||||
import type { Node } from "prosemirror-model";
|
||||
import { DOMParser as ProsemirrorDOMParser } from "prosemirror-model";
|
||||
import { ProsemirrorHelper as SharedProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
|
||||
import { schema, serializer } from "@server/editor";
|
||||
import { FileImportError } from "@server/errors";
|
||||
import { trace, traceFunction } from "@server/logging/tracing";
|
||||
import { ProsemirrorHelper } from "@server/models/helpers/ProsemirrorHelper";
|
||||
import turndownService from "@server/utils/turndown";
|
||||
|
||||
export interface ConvertResult {
|
||||
/** The document content as markdown text. */
|
||||
text: string;
|
||||
/** The document content as Prosemirror. */
|
||||
doc: Node;
|
||||
/** The extracted title (from H1 heading if present). */
|
||||
title: string;
|
||||
/** The extracted emoji/icon from start of document. */
|
||||
icon?: string;
|
||||
}
|
||||
|
||||
@trace()
|
||||
export class DocumentConverter {
|
||||
/**
|
||||
* Convert an incoming file to markdown.
|
||||
* Convert an incoming file to a structured document result.
|
||||
*
|
||||
* @param content The content of the file.
|
||||
* @param fileName The name of the file, including extension.
|
||||
* @param mimeType The mime type of the file.
|
||||
* @returns The markdown representation of the file.
|
||||
* @returns The converted document with text, data, title, and icon.
|
||||
*/
|
||||
public static async convertToMarkdown(
|
||||
public static async convert(
|
||||
content: Buffer | string,
|
||||
fileName: string,
|
||||
mimeType: string
|
||||
) {
|
||||
return (
|
||||
await this.internalConvertToMarkdown(content, fileName, mimeType)
|
||||
).trim();
|
||||
}
|
||||
): Promise<ConvertResult> {
|
||||
let doc: Node;
|
||||
|
||||
private static async internalConvertToMarkdown(
|
||||
content: Buffer | string,
|
||||
fileName: string,
|
||||
mimeType: string
|
||||
) {
|
||||
// First try to convert the file based on the mime type.
|
||||
switch (mimeType) {
|
||||
case "application/msword":
|
||||
return this.confluenceToMarkdown(content);
|
||||
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
||||
return this.docXToMarkdown(content);
|
||||
case "text/html":
|
||||
return this.htmlToMarkdown(content);
|
||||
case "text/plain":
|
||||
case "text/markdown":
|
||||
return this.fileToMarkdown(content);
|
||||
case "text/csv":
|
||||
return this.csvToMarkdown(content);
|
||||
default:
|
||||
break;
|
||||
// Route to appropriate conversion method
|
||||
const html = await this.convertToHtml(content, fileName, mimeType);
|
||||
if (html !== undefined) {
|
||||
doc = this.htmlToProsemirror(html);
|
||||
} else {
|
||||
const markdown = await this.convertToMarkdown(
|
||||
content,
|
||||
fileName,
|
||||
mimeType
|
||||
);
|
||||
doc = ProsemirrorHelper.toProsemirror(markdown);
|
||||
}
|
||||
|
||||
// If the mime type doesn't work, try to convert based on the file extension.
|
||||
const extension = fileName.split(".").pop();
|
||||
switch (extension) {
|
||||
case "docx":
|
||||
return this.docXToMarkdown(content);
|
||||
case "html":
|
||||
return this.htmlToMarkdown(content);
|
||||
case "md":
|
||||
case "markdown":
|
||||
return this.fileToMarkdown(content);
|
||||
default:
|
||||
throw FileImportError(`File type ${mimeType} not supported`);
|
||||
}
|
||||
}
|
||||
|
||||
public static async docXToMarkdown(content: Buffer | string) {
|
||||
if (content instanceof Buffer) {
|
||||
const { value } = await traceFunction({ spanName: "convertToHtml" })(
|
||||
mammoth.convertToHtml
|
||||
)({
|
||||
buffer: content,
|
||||
});
|
||||
|
||||
return turndownService.turndown(value);
|
||||
// Extract title from first H1 heading
|
||||
let title = "";
|
||||
const headings = SharedProsemirrorHelper.getHeadings(doc);
|
||||
if (headings.length > 0 && headings[0].level === 1) {
|
||||
title = headings[0].title;
|
||||
doc = ProsemirrorHelper.removeFirstHeading(doc);
|
||||
}
|
||||
|
||||
throw FileImportError("Unsupported Word file");
|
||||
}
|
||||
// Extract emoji from start of document
|
||||
const { emoji: icon, doc: docWithoutEmoji } =
|
||||
ProsemirrorHelper.extractEmojiFromStart(doc);
|
||||
doc = docWithoutEmoji;
|
||||
|
||||
public static async htmlToMarkdown(content: Buffer | string) {
|
||||
if (typeof content !== "string") {
|
||||
content = content.toString("utf8");
|
||||
}
|
||||
// Serialize to markdown and trim whitespace
|
||||
const text = serializer.serialize(doc).trim();
|
||||
|
||||
return turndownService.turndown(content);
|
||||
return {
|
||||
text,
|
||||
doc,
|
||||
title,
|
||||
icon,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -93,19 +83,231 @@ export class DocumentConverter {
|
||||
* @returns A Prosemirror Node representing the document.
|
||||
*/
|
||||
public static htmlToProsemirror(content: Buffer | string): Node {
|
||||
return ProsemirrorHelper.htmlToProsemirror(content);
|
||||
if (typeof content !== "string") {
|
||||
content = content.toString("utf8");
|
||||
}
|
||||
|
||||
const dom = new JSDOM(content);
|
||||
const document = dom.window.document;
|
||||
|
||||
// Remove problematic elements before parsing
|
||||
const elementsToRemove = document.querySelectorAll(
|
||||
"script, style, title, head, meta, link"
|
||||
);
|
||||
elementsToRemove.forEach((el) => el.remove());
|
||||
|
||||
// Preprocess the DOM to handle edge cases
|
||||
this.preprocessHtmlForImport(document);
|
||||
|
||||
// Patch global environment for Prosemirror DOMParser
|
||||
const cleanup = ProsemirrorHelper.patchGlobalEnv(dom.window);
|
||||
|
||||
try {
|
||||
const domParser = ProsemirrorDOMParser.fromSchema(schema);
|
||||
return domParser.parse(document.body);
|
||||
} finally {
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
public static csvToMarkdown(content: Buffer | string): Promise<string> {
|
||||
/**
|
||||
* Preprocesses HTML DOM before Prosemirror parsing to cleanup
|
||||
* images and other elements.
|
||||
*
|
||||
* @param document The DOM document to preprocess.
|
||||
*/
|
||||
private static preprocessHtmlForImport(document: Document): void {
|
||||
// Handle images: filter emoticons, remove Jira icons, apply Confluence sizing
|
||||
const images = document.querySelectorAll("img");
|
||||
images.forEach((img) => {
|
||||
const className = img.className || "";
|
||||
|
||||
// Skip emoticon images (they'll be dropped)
|
||||
if (className.includes("emoticon")) {
|
||||
img.remove();
|
||||
return;
|
||||
}
|
||||
|
||||
// Remove Jira icon images
|
||||
if (
|
||||
className === "icon" &&
|
||||
img.parentElement?.className.includes("jira-issue-key")
|
||||
) {
|
||||
img.remove();
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle Confluence image sizing: data-width/data-height → width/height
|
||||
const dataWidth = img.getAttribute("data-width");
|
||||
const dataHeight = img.getAttribute("data-height");
|
||||
const width = img.getAttribute("width");
|
||||
|
||||
if (dataWidth && dataHeight && width) {
|
||||
const ratio = parseInt(dataWidth) / parseInt(width);
|
||||
const calculatedHeight = Math.round(parseInt(dataHeight) / ratio);
|
||||
img.setAttribute("height", String(calculatedHeight));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to convert content to HTML for formats that support it.
|
||||
* Returns undefined for formats that should be parsed as markdown directly.
|
||||
*
|
||||
* @param content The content of the file.
|
||||
* @param fileName The name of the file, including extension.
|
||||
* @param mimeType The mime type of the file.
|
||||
* @returns HTML string if convertible, undefined otherwise.
|
||||
*/
|
||||
private static async convertToHtml(
|
||||
content: Buffer | string,
|
||||
fileName: string,
|
||||
mimeType: string
|
||||
): Promise<string | undefined> {
|
||||
// First try to convert based on the mime type
|
||||
switch (mimeType) {
|
||||
case "text/html":
|
||||
return typeof content === "string" ? content : content.toString("utf8");
|
||||
case "application/msword":
|
||||
return this.confluenceToHtml(content);
|
||||
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
||||
return this.docxToHtml(content);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// Try to convert based on the file extension
|
||||
const extension = fileName.split(".").pop();
|
||||
switch (extension) {
|
||||
case "html":
|
||||
return typeof content === "string" ? content : content.toString("utf8");
|
||||
case "docx":
|
||||
return this.docxToHtml(content);
|
||||
default:
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts content to markdown for text-based formats.
|
||||
*
|
||||
* @param content The content of the file.
|
||||
* @param fileName The name of the file, including extension.
|
||||
* @param mimeType The mime type of the file.
|
||||
* @returns Markdown string.
|
||||
*/
|
||||
private static async convertToMarkdown(
|
||||
content: Buffer | string,
|
||||
fileName: string,
|
||||
mimeType: string
|
||||
): Promise<string> {
|
||||
switch (mimeType) {
|
||||
case "text/plain":
|
||||
case "text/markdown":
|
||||
return this.bufferToString(content);
|
||||
case "text/csv":
|
||||
return this.csvToMarkdown(content);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
const extension = fileName.split(".").pop();
|
||||
switch (extension) {
|
||||
case "md":
|
||||
case "markdown":
|
||||
return this.bufferToString(content);
|
||||
default:
|
||||
throw FileImportError(`File type ${mimeType} not supported`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a docx file to HTML using mammoth.
|
||||
*
|
||||
* @param content The docx file content as a Buffer.
|
||||
* @returns The HTML representation of the document.
|
||||
*/
|
||||
private static async docxToHtml(content: Buffer | string): Promise<string> {
|
||||
if (content instanceof Buffer) {
|
||||
const { value } = await traceFunction({ spanName: "convertToHtml" })(
|
||||
mammoth.convertToHtml
|
||||
)({
|
||||
buffer: content,
|
||||
});
|
||||
return value;
|
||||
}
|
||||
throw FileImportError("Unsupported Word file");
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a Confluence Word export to HTML.
|
||||
*
|
||||
* @param content The Confluence Word export content.
|
||||
* @returns The HTML representation of the document.
|
||||
*/
|
||||
private static async confluenceToHtml(
|
||||
content: Buffer | string
|
||||
): Promise<string> {
|
||||
if (typeof content !== "string") {
|
||||
content = content.toString("utf8");
|
||||
}
|
||||
|
||||
// We're only supporting the output from Confluence here, regular Word documents should call
|
||||
// into the docxToHtml importer. See: https://jira.atlassian.com/browse/CONFSERVER-38237
|
||||
if (!content.includes("Content-Type: multipart/related")) {
|
||||
throw FileImportError("Unsupported Word file");
|
||||
}
|
||||
|
||||
// Confluence "Word" documents are actually just multi-part email messages, so we can use
|
||||
// mailparser to parse the content.
|
||||
const parsed = await simpleParser(content);
|
||||
if (!parsed.html) {
|
||||
throw FileImportError("Unsupported Word file (No content found)");
|
||||
}
|
||||
|
||||
let html = parsed.html;
|
||||
|
||||
// Replace the content-location with a data URI for each attachment.
|
||||
for (const attachment of parsed.attachments) {
|
||||
const contentLocation = String(
|
||||
attachment.headers.get("content-location") ?? ""
|
||||
);
|
||||
|
||||
const id = contentLocation.split("/").pop();
|
||||
if (!id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
html = html.replace(
|
||||
new RegExp(escapeRegExp(id), "g"),
|
||||
`data:image/png;base64,${attachment.content.toString("base64")}`
|
||||
);
|
||||
}
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a CSV file to a markdown table.
|
||||
*
|
||||
* @param content The CSV file content.
|
||||
* @returns A markdown table representation.
|
||||
*/
|
||||
private static csvToMarkdown(content: Buffer | string): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const text = this.fileToMarkdown(content).trim();
|
||||
const firstLine = text.split("\n")[0];
|
||||
const text = this.bufferToString(content).trim();
|
||||
const textLines = text.split("\n");
|
||||
|
||||
// Find the first non-empty line to determine the delimiter
|
||||
const firstNonEmptyLine =
|
||||
textLines.find((line) => line.trim().length > 0) || "";
|
||||
|
||||
// Determine the separator used in the CSV file based on number of occurrences of each separator on first line
|
||||
const delimiter = [";", ",", "\t"].reduce(
|
||||
(acc, separator) => {
|
||||
const count = (
|
||||
firstLine.match(new RegExp(escapeRegExp(separator), "g")) || []
|
||||
firstNonEmptyLine.match(new RegExp(escapeRegExp(separator), "g")) ||
|
||||
[]
|
||||
).length;
|
||||
return count > acc.count ? { count, separator } : acc;
|
||||
},
|
||||
@@ -121,9 +323,64 @@ export class DocumentConverter {
|
||||
})
|
||||
.on("data", (row) => lines.push(row))
|
||||
.on("end", () => {
|
||||
const headers = lines[0];
|
||||
const table = lines
|
||||
.slice(1)
|
||||
// Filter out completely empty rows
|
||||
const nonEmptyLines = lines.filter((row) =>
|
||||
row.some((cell) => cell.trim() !== "")
|
||||
);
|
||||
|
||||
if (nonEmptyLines.length === 0) {
|
||||
resolve("");
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if all rows have a trailing empty cell (trailing comma artifact)
|
||||
// Only trim if ALL non-empty rows end with an empty cell
|
||||
let trimmedLines = nonEmptyLines;
|
||||
while (
|
||||
trimmedLines.length > 0 &&
|
||||
trimmedLines.every(
|
||||
(row) => row.length > 0 && row[row.length - 1].trim() === ""
|
||||
)
|
||||
) {
|
||||
trimmedLines = trimmedLines.map((row) => row.slice(0, -1));
|
||||
}
|
||||
|
||||
// Find the most common column count
|
||||
const columnCounts = new Map<number, number>();
|
||||
for (const row of trimmedLines) {
|
||||
if (row.length > 0) {
|
||||
columnCounts.set(
|
||||
row.length,
|
||||
(columnCounts.get(row.length) || 0) + 1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Get the column count that appears most frequently
|
||||
let expectedColumns = 0;
|
||||
let maxFrequency = 0;
|
||||
for (const [count, frequency] of columnCounts) {
|
||||
if (frequency > maxFrequency) {
|
||||
maxFrequency = frequency;
|
||||
expectedColumns = count;
|
||||
}
|
||||
}
|
||||
|
||||
// Find the first row with the expected column count (this is the header)
|
||||
const headerIndex = trimmedLines.findIndex(
|
||||
(row) => row.length === expectedColumns
|
||||
);
|
||||
if (headerIndex === -1) {
|
||||
resolve("");
|
||||
return;
|
||||
}
|
||||
|
||||
const headers = trimmedLines[headerIndex];
|
||||
const dataRows = trimmedLines
|
||||
.slice(headerIndex + 1)
|
||||
.filter((row) => row.length === expectedColumns);
|
||||
|
||||
const table = dataRows
|
||||
.map((cells) => `| ${cells.join(" | ")} |`)
|
||||
.join("\n");
|
||||
|
||||
@@ -138,54 +395,13 @@ export class DocumentConverter {
|
||||
});
|
||||
}
|
||||
|
||||
public static fileToMarkdown(content: Buffer | string) {
|
||||
if (typeof content !== "string") {
|
||||
content = content.toString("utf8");
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
public static async confluenceToMarkdown(content: Buffer | string) {
|
||||
if (typeof content !== "string") {
|
||||
content = content.toString("utf8");
|
||||
}
|
||||
|
||||
// We're only supporting the output from Confluence here, regular Word documents should call
|
||||
// into the docxToMarkdown importer. See: https://jira.atlassian.com/browse/CONFSERVER-38237
|
||||
if (!content.includes("Content-Type: multipart/related")) {
|
||||
throw FileImportError("Unsupported Word file");
|
||||
}
|
||||
|
||||
// Confluence "Word" documents are actually just multi-part email messages, so we can use
|
||||
// mailparser to parse the content.
|
||||
const parsed = await simpleParser(content);
|
||||
if (!parsed.html) {
|
||||
throw FileImportError("Unsupported Word file (No content found)");
|
||||
}
|
||||
|
||||
// Replace the content-location with a data URI for each attachment.
|
||||
for (const attachment of parsed.attachments) {
|
||||
const contentLocation = String(
|
||||
attachment.headers.get("content-location") ?? ""
|
||||
);
|
||||
|
||||
const id = contentLocation.split("/").pop();
|
||||
if (!id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
parsed.html = parsed.html.replace(
|
||||
new RegExp(escapeRegExp(id), "g"),
|
||||
`data:image/png;base64,${attachment.content.toString("base64")}`
|
||||
);
|
||||
}
|
||||
|
||||
// If we don't remove the title here it becomes printed in the document
|
||||
// body by turndown
|
||||
turndownService.remove(["style", "title"]);
|
||||
|
||||
// Now we should have something that looks like HTML
|
||||
const html = turndownService.turndown(parsed.html);
|
||||
return html.replace(/<br>/g, " \\n ");
|
||||
/**
|
||||
* Convert a Buffer to a string.
|
||||
*
|
||||
* @param content The content as a Buffer or string.
|
||||
* @returns The content as a string.
|
||||
*/
|
||||
private static bufferToString(content: Buffer | string): string {
|
||||
return typeof content === "string" ? content : content.toString("utf8");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
import type TurndownService from "turndown";
|
||||
|
||||
/**
|
||||
* A turndown plugin for converting break tags to newlines.
|
||||
*
|
||||
* @param turndownService The TurndownService instance.
|
||||
*/
|
||||
export default function breaks(turndownService: TurndownService) {
|
||||
turndownService.addRule("breaks", {
|
||||
filter: ["br"],
|
||||
replacement() {
|
||||
return "\\n";
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
import type TurndownService from "turndown";
|
||||
|
||||
/**
|
||||
* A turndown plugin for unwrapping top-level empty list items.
|
||||
*
|
||||
* @param turndownService The TurndownService instance.
|
||||
*/
|
||||
export default function emptyLists(turndownService: TurndownService) {
|
||||
turndownService.addRule("empty-lists", {
|
||||
filter(node) {
|
||||
return (
|
||||
node.nodeName === "LI" &&
|
||||
node.childNodes.length === 1 &&
|
||||
(node.firstChild?.nodeName === "OL" ||
|
||||
node.firstChild?.nodeName === "UL")
|
||||
);
|
||||
},
|
||||
replacement(content) {
|
||||
return content;
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
import type TurndownService from "turndown";
|
||||
|
||||
/**
|
||||
* A turndown plugin for converting paragraphs with only breaks to newlines.
|
||||
*
|
||||
* @param turndownService The TurndownService instance.
|
||||
*/
|
||||
export default function emptyParagraphs(turndownService: TurndownService) {
|
||||
turndownService.addRule("emptyParagraphs", {
|
||||
filter(node) {
|
||||
return (
|
||||
node.nodeName === "P" &&
|
||||
node.children.length === 1 &&
|
||||
node.textContent?.trim() === "" &&
|
||||
node.children[0].nodeName === "BR"
|
||||
);
|
||||
},
|
||||
replacement() {
|
||||
return "\n\n\\\n";
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
import type TurndownService from "turndown";
|
||||
|
||||
/**
|
||||
* A turndown plugin to convert iframes to markdown links.
|
||||
*
|
||||
* @param turndownService The TurndownService instance.
|
||||
*/
|
||||
export default function images(turndownService: TurndownService) {
|
||||
turndownService.addRule("frames", {
|
||||
filter: "iframe",
|
||||
replacement(content, node: HTMLIFrameElement) {
|
||||
const src = (node.getAttribute("src") || "").replace(/\n+/g, "");
|
||||
const title = cleanAttribute(node.getAttribute("title") || "");
|
||||
return src ? "[" + (title || src) + "]" + "(" + src + ")" : "";
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function cleanAttribute(attribute: string) {
|
||||
return attribute ? attribute.replace(/(\n+\s*)+/g, "\n") : "";
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
import type TurndownService from "turndown";
|
||||
|
||||
/**
|
||||
* A turndown plugin overriding inbuilt image parsing behavior
|
||||
*
|
||||
* @param turndownService The TurndownService instance.
|
||||
*/
|
||||
export default function images(turndownService: TurndownService) {
|
||||
turndownService.addRule("image", {
|
||||
filter(node) {
|
||||
return node.nodeName === "IMG" && !node?.className.includes("emoticon");
|
||||
},
|
||||
replacement(content, node) {
|
||||
if (!("className" in node)) {
|
||||
return content;
|
||||
}
|
||||
const alt = cleanAttribute(node.getAttribute("alt") || "");
|
||||
const src = cleanAttribute(node.getAttribute("src") || "");
|
||||
const title = cleanAttribute(node.getAttribute("title") || "");
|
||||
|
||||
// Remove icons in issue keys as they will not resolve correctly and mess
|
||||
// up the layout.
|
||||
if (
|
||||
node.className === "icon" &&
|
||||
node.parentElement?.className.includes("jira-issue-key")
|
||||
) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Respect embedded Confluence image size
|
||||
let size;
|
||||
const naturalWidth = node.getAttribute("data-width");
|
||||
const naturalHeight = node.getAttribute("data-height");
|
||||
const width = node.getAttribute("width");
|
||||
|
||||
if (naturalWidth && naturalHeight && width) {
|
||||
const ratio = parseInt(naturalWidth) / parseInt(width);
|
||||
size = ` =${width}x${parseInt(naturalHeight) / ratio}`;
|
||||
}
|
||||
|
||||
const titlePart = title || size ? ` "${title}${size}"` : "";
|
||||
|
||||
return src ? `` : "";
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function cleanAttribute(attribute: string) {
|
||||
return (attribute ? attribute.replace(/\n+/g, "") : "").trim();
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
import { taskListItems, strikethrough } from "@joplin/turndown-plugin-gfm";
|
||||
import TurndownService from "turndown";
|
||||
import { escape } from "@shared/utils/markdown";
|
||||
import breaks from "./breaks";
|
||||
import emptyLists from "./emptyLists";
|
||||
import emptyParagraph from "./emptyParagraph";
|
||||
import frames from "./frames";
|
||||
import images from "./images";
|
||||
import inlineLink from "./inlineLink";
|
||||
import sanitizeLists from "./sanitizeLists";
|
||||
import sanitizeTables from "./sanitizeTables";
|
||||
import tables from "./tables";
|
||||
import underlines from "./underlines";
|
||||
import { inHtmlContext } from "./utils";
|
||||
|
||||
/**
|
||||
* Turndown converts HTML to Markdown and is used in the importer code.
|
||||
*
|
||||
* For options, see: https://github.com/domchristie/turndown#options
|
||||
*/
|
||||
const service = new TurndownService({
|
||||
hr: "---",
|
||||
bulletListMarker: "-",
|
||||
headingStyle: "atx",
|
||||
codeBlockStyle: "fenced",
|
||||
blankReplacement: (_, node) =>
|
||||
node.nodeName === "P" && !inHtmlContext(node as HTMLElement, "td, th")
|
||||
? "\n\n\\\n"
|
||||
: "",
|
||||
})
|
||||
.remove(["script", "style", "title", "head"])
|
||||
.use(taskListItems)
|
||||
.use(strikethrough)
|
||||
.use(tables)
|
||||
.use(inlineLink)
|
||||
.use(emptyParagraph)
|
||||
.use(sanitizeTables)
|
||||
.use(sanitizeLists)
|
||||
.use(underlines)
|
||||
.use(frames)
|
||||
.use(images)
|
||||
.use(breaks)
|
||||
.use(emptyLists);
|
||||
|
||||
service.escape = escape;
|
||||
|
||||
export default service;
|
||||
@@ -1,22 +0,0 @@
|
||||
import type TurndownService from "turndown";
|
||||
|
||||
/**
|
||||
* A turndown plugin for converting anchors to inline links without a title.
|
||||
*
|
||||
* @param turndownService The TurndownService instance.
|
||||
*/
|
||||
export default function underlines(turndownService: TurndownService) {
|
||||
turndownService.addRule("inlineLink", {
|
||||
filter(node, options) {
|
||||
return !!(
|
||||
options.linkStyle === "inlined" &&
|
||||
node.nodeName === "A" &&
|
||||
node.getAttribute("href")
|
||||
);
|
||||
},
|
||||
replacement(content, node: HTMLElement) {
|
||||
const href = node.getAttribute("href");
|
||||
return "[" + content + "](" + href + ")";
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
import type TurndownService from "turndown";
|
||||
import { inHtmlContext } from "./utils";
|
||||
|
||||
/**
|
||||
* A turndown plugin for removing incompatible nodes from lists.
|
||||
*
|
||||
* @param turndownService The TurndownService instance.
|
||||
*/
|
||||
export default function sanitizeLists(turndownService: TurndownService) {
|
||||
// Fork of default functionality to only use a single space between marker and content
|
||||
// See: https://github.com/mixmark-io/turndown/blob/cc73387fb707e5fb5e1083e94078d08f38f3abc8/src/commonmark-rules.js#L61
|
||||
turndownService.addRule("listItem", {
|
||||
filter: "li",
|
||||
|
||||
replacement(content, node, options) {
|
||||
content = content
|
||||
.replace(/^\n+/, "") // remove leading newlines
|
||||
.replace(/\n+$/, "\n") // replace trailing newlines with just a single one
|
||||
.replace(/\n/gm, "\n "); // 4 space indent
|
||||
|
||||
let prefix = options.bulletListMarker + " ";
|
||||
const parent = node.parentNode;
|
||||
if (parent && parent.nodeName === "OL") {
|
||||
const start = (parent as HTMLElement).getAttribute("start");
|
||||
const index = Array.prototype.indexOf.call(parent.children, node);
|
||||
prefix = (start ? Number(start) + index : index + 1) + ". ";
|
||||
}
|
||||
const output =
|
||||
prefix +
|
||||
content +
|
||||
(node.nextSibling && !/\n$/.test(content) ? "\n" : "");
|
||||
return output;
|
||||
},
|
||||
});
|
||||
|
||||
turndownService.addRule("headingsInLists", {
|
||||
filter(node) {
|
||||
return (
|
||||
["H1", "H2", "H3", "H4", "H5", "H6"].includes(node.nodeName) &&
|
||||
inHtmlContext(node, "LI")
|
||||
);
|
||||
},
|
||||
replacement(content, node, options) {
|
||||
if (!content.trim()) {
|
||||
return "";
|
||||
}
|
||||
return options.strongDelimiter + content + options.strongDelimiter;
|
||||
},
|
||||
});
|
||||
|
||||
turndownService.addRule("strongInHeadings", {
|
||||
filter(node) {
|
||||
return (
|
||||
(node.nodeName === "STRONG" || node.nodeName === "B") &&
|
||||
["H1", "H2", "H3", "H4", "H5", "H6"].some((tag) =>
|
||||
inHtmlContext(node, tag)
|
||||
)
|
||||
);
|
||||
},
|
||||
replacement(content) {
|
||||
return content;
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
import type TurndownService from "turndown";
|
||||
import { inHtmlContext } from "./utils";
|
||||
|
||||
/**
|
||||
* A turndown plugin for removing incompatible nodes from tables.
|
||||
*
|
||||
* @param turndownService The TurndownService instance.
|
||||
*/
|
||||
export default function sanitizeTables(turndownService: TurndownService) {
|
||||
turndownService.addRule("headingsInTables", {
|
||||
filter(node) {
|
||||
return (
|
||||
["H1", "H2", "H3", "H4", "H5", "H6"].includes(node.nodeName) &&
|
||||
inHtmlContext(node, "table")
|
||||
);
|
||||
},
|
||||
replacement(content) {
|
||||
return `**${content.trim()}**`;
|
||||
},
|
||||
});
|
||||
|
||||
turndownService.addRule("paragraphsInCells", {
|
||||
filter(node) {
|
||||
return node.nodeName === "P" && inHtmlContext(node, "table");
|
||||
},
|
||||
replacement(content, node) {
|
||||
return content.trim() + (node.nextSibling ? "\\n" : "");
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1,325 +0,0 @@
|
||||
// Based on https://www.npmjs.com/package/joplin-turndown-plugin-gfm
|
||||
import type TurndownService from "turndown";
|
||||
import { inHtmlContext } from "./utils";
|
||||
|
||||
const rules: Record<string, TurndownService.Rule> = {};
|
||||
const alignMap = { left: ":---", right: "---:", center: ":---:" };
|
||||
|
||||
// Note use of WeakMap to enable garbage collection
|
||||
const tableShouldBeSkippedCache = new WeakMap<HTMLTableElement, boolean>();
|
||||
|
||||
function getAlignment(node: HTMLElement) {
|
||||
return node
|
||||
? ((
|
||||
node.getAttribute("align") ||
|
||||
node.style.textAlign ||
|
||||
""
|
||||
).toLowerCase() as "left" | "right" | "center")
|
||||
: "";
|
||||
}
|
||||
|
||||
function getBorder(alignment: keyof typeof alignMap) {
|
||||
return alignment ? alignMap[alignment] : "---";
|
||||
}
|
||||
|
||||
function getColumnAlignment(
|
||||
table: HTMLTableElement | null,
|
||||
columnIndex: number
|
||||
) {
|
||||
const votes = {
|
||||
left: 0,
|
||||
right: 0,
|
||||
center: 0,
|
||||
"": 0,
|
||||
};
|
||||
|
||||
let align: keyof typeof alignMap = "left";
|
||||
if (!table) {
|
||||
return align;
|
||||
}
|
||||
|
||||
// Reference is important as .rows is an expensive getter.
|
||||
const rows = table.rows;
|
||||
|
||||
for (let i = 0; i < rows.length; ++i) {
|
||||
const row = rows[i];
|
||||
if (columnIndex < row.childNodes.length) {
|
||||
const cellAlignment = getAlignment(
|
||||
row.childNodes[columnIndex] as HTMLElement
|
||||
);
|
||||
++votes[cellAlignment];
|
||||
|
||||
if (
|
||||
votes[cellAlignment] > votes[align] &&
|
||||
Object.keys(alignMap).includes(cellAlignment)
|
||||
) {
|
||||
align = cellAlignment as keyof typeof alignMap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return align;
|
||||
}
|
||||
|
||||
rules.tableCell = {
|
||||
filter: ["th", "td"],
|
||||
replacement(content, node: HTMLTableCellElement) {
|
||||
if (tableShouldBeSkipped(nodeParentTable(node))) {
|
||||
return content;
|
||||
}
|
||||
return cell(content, node);
|
||||
},
|
||||
};
|
||||
|
||||
rules.tableRow = {
|
||||
filter: "tr",
|
||||
replacement(content, node: HTMLTableRowElement) {
|
||||
const parentTable = nodeParentTable(node);
|
||||
if (tableShouldBeSkipped(parentTable)) {
|
||||
return content;
|
||||
}
|
||||
|
||||
let borderCells = "";
|
||||
|
||||
if (isHeadingRow(node)) {
|
||||
const colCount = tableColCount(parentTable);
|
||||
for (let i = 0; i < colCount; i++) {
|
||||
const childNode =
|
||||
i < node.childNodes.length ? node.childNodes[i] : null;
|
||||
const border = getBorder(getColumnAlignment(parentTable, i));
|
||||
borderCells += cell(border, childNode, i);
|
||||
}
|
||||
}
|
||||
return "\n" + content + (borderCells ? "\n" + borderCells : "");
|
||||
},
|
||||
};
|
||||
|
||||
rules.table = {
|
||||
// Only convert tables that can result in valid Markdown
|
||||
// Other tables are kept as HTML using `keep` (see below).
|
||||
filter(node) {
|
||||
return node.nodeName === "TABLE" && !tableShouldBeHtml(node);
|
||||
},
|
||||
|
||||
replacement(content, node: HTMLTableElement) {
|
||||
if (tableShouldBeSkipped(node)) {
|
||||
return content;
|
||||
}
|
||||
|
||||
// Ensure there are no blank lines
|
||||
content = content.replace(/\n+/g, "\n");
|
||||
|
||||
// If table has no heading, add an empty one so as to get a valid Markdown table
|
||||
const secondLineParts = content.trim().split("\n");
|
||||
let secondLine = "";
|
||||
if (secondLineParts.length >= 2) {
|
||||
secondLine = secondLineParts[1];
|
||||
}
|
||||
const secondLineIsDivider = /\| :?---/.test(secondLine);
|
||||
|
||||
const columnCount = tableColCount(node);
|
||||
let emptyHeader = "";
|
||||
if (columnCount && !secondLineIsDivider) {
|
||||
emptyHeader = "|" + " |".repeat(columnCount) + "\n" + "|";
|
||||
for (let columnIndex = 0; columnIndex < columnCount; ++columnIndex) {
|
||||
emptyHeader +=
|
||||
" " + getBorder(getColumnAlignment(node, columnIndex)) + " |";
|
||||
}
|
||||
}
|
||||
|
||||
return "\n\n" + emptyHeader + content + "\n\n";
|
||||
},
|
||||
};
|
||||
|
||||
rules.tableSection = {
|
||||
filter: ["thead", "tbody", "tfoot"],
|
||||
replacement(content) {
|
||||
return content;
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* A tr is a heading row if the parent is a THEAD or its the first child of the TABLE or the first
|
||||
* TBODY (possibly following a blank THEAD) and every cell is a TH.
|
||||
*
|
||||
* @param tr The tr node to check
|
||||
* @returns Whether the tr is a heading row
|
||||
*/
|
||||
function isHeadingRow(tr: Node) {
|
||||
const parentNode = tr.parentNode;
|
||||
if (!parentNode) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (
|
||||
parentNode.nodeName === "THEAD" ||
|
||||
Array.from(tr.childNodes).every((n) => n.nodeName === "TH")
|
||||
);
|
||||
}
|
||||
|
||||
function cell(
|
||||
content: string,
|
||||
node: ChildNode | null = null,
|
||||
index: number | null = null
|
||||
) {
|
||||
if (index === null && node) {
|
||||
index = Array.from(node?.parentNode?.childNodes ?? []).indexOf(node);
|
||||
}
|
||||
let prefix = " ";
|
||||
if (index === 0) {
|
||||
prefix = "| ";
|
||||
}
|
||||
let filteredContent = content
|
||||
.trim()
|
||||
.replace(/\n\r/g, "<br>")
|
||||
.replace(/\n/g, "<br>");
|
||||
filteredContent = filteredContent.replace(/\|+/g, "\\|");
|
||||
while (filteredContent.length < 3) {
|
||||
filteredContent += " ";
|
||||
}
|
||||
if (node) {
|
||||
filteredContent = handleColSpan(filteredContent, node, " ");
|
||||
}
|
||||
return prefix + filteredContent + " |";
|
||||
}
|
||||
|
||||
function nodeContainsTable(node: Node) {
|
||||
if (!node?.childNodes) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (let i = 0; i < node.childNodes.length; i++) {
|
||||
const child = node.childNodes[i];
|
||||
if (child.nodeName === "TABLE") {
|
||||
return true;
|
||||
}
|
||||
if (nodeContainsTable(child)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const nodeContains = (node: HTMLElement, types: string | string[]) => {
|
||||
if (!node?.childNodes) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (let i = 0; i < node.childNodes.length; i++) {
|
||||
const child = node.childNodes[i] as HTMLElement;
|
||||
if (types === "code" && inHtmlContext(child, "CODE")) {
|
||||
return true;
|
||||
}
|
||||
if (types.includes(child.nodeName)) {
|
||||
return true;
|
||||
}
|
||||
if (nodeContains(child, types)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
const tableShouldBeHtml = (tableNode: HTMLElement) =>
|
||||
nodeContains(tableNode, ["code", "table"]);
|
||||
|
||||
// Various conditions under which a table should be skipped - i.e. each cell
|
||||
// will be rendered one after the other as if they were paragraphs.
|
||||
function tableShouldBeSkipped(tableNode: HTMLTableElement | null) {
|
||||
if (!tableNode) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const cached = tableShouldBeSkippedCache.get(tableNode);
|
||||
if (cached !== undefined) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
const process = () => {
|
||||
if (!tableNode) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reference is important as .rows is an expensive getter.
|
||||
const rows = tableNode.rows;
|
||||
|
||||
if (!rows) {
|
||||
return true;
|
||||
}
|
||||
if (rows.length === 1 && rows[0].childNodes.length <= 1) {
|
||||
return true;
|
||||
}
|
||||
if (nodeContainsTable(tableNode)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
const result = process();
|
||||
tableShouldBeSkippedCache.set(tableNode, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
function nodeParentTable(
|
||||
node: HTMLTableCellElement | HTMLTableRowElement
|
||||
): HTMLTableElement | null {
|
||||
let parent = node.parentNode;
|
||||
if (!parent) {
|
||||
return null;
|
||||
}
|
||||
|
||||
while (parent.nodeName !== "TABLE") {
|
||||
parent = parent.parentNode;
|
||||
if (!parent) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return parent as HTMLTableElement;
|
||||
}
|
||||
|
||||
function handleColSpan(content: string, node: ChildNode, emptyChar: string) {
|
||||
if (!node) {
|
||||
return content;
|
||||
}
|
||||
|
||||
const colspan = Number((node as HTMLElement).getAttribute("colspan") || 1);
|
||||
for (let i = 1; i < colspan; i++) {
|
||||
content += " | " + emptyChar.repeat(3);
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
function tableColCount(node: HTMLTableElement | null) {
|
||||
if (!node) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let maxColCount = 0;
|
||||
|
||||
// Reference is important as .rows is an expensive getter.
|
||||
const rows = node.rows;
|
||||
|
||||
for (let i = 0; i < rows.length; i++) {
|
||||
const row = rows[i];
|
||||
const colCount = row.childNodes.length;
|
||||
if (colCount > maxColCount) {
|
||||
maxColCount = colCount;
|
||||
}
|
||||
}
|
||||
return maxColCount;
|
||||
}
|
||||
|
||||
export default function tables(turndownService: TurndownService) {
|
||||
turndownService.keep(function (node) {
|
||||
if (node.nodeName === "TABLE" && tableShouldBeHtml(node)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
for (const key in rules) {
|
||||
turndownService.addRule(key, rules[key]);
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
import type TurndownService from "turndown";
|
||||
|
||||
/**
|
||||
* A turndown plugin for converting u tags to underlines.
|
||||
*
|
||||
* @param turndownService The TurndownService instance.
|
||||
*/
|
||||
export default function underlines(turndownService: TurndownService) {
|
||||
turndownService.addRule("underlines", {
|
||||
filter: ["u"],
|
||||
replacement(content) {
|
||||
return `__${content.trim()}__`;
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
export function inHtmlContext(node: HTMLElement, selector: string) {
|
||||
let currentNode = node;
|
||||
// start at the closest element
|
||||
while (currentNode !== null && currentNode.nodeType !== 1) {
|
||||
currentNode = (currentNode.parentElement ||
|
||||
currentNode.parentNode) as HTMLElement;
|
||||
}
|
||||
return (
|
||||
currentNode !== null &&
|
||||
currentNode.nodeType === 1 &&
|
||||
currentNode.closest(selector) !== null
|
||||
);
|
||||
}
|
||||
@@ -4079,13 +4079,6 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@joplin/turndown-plugin-gfm@npm:^1.0.49":
|
||||
version: 1.0.64
|
||||
resolution: "@joplin/turndown-plugin-gfm@npm:1.0.64"
|
||||
checksum: 10c0/cbbcba0f111e420535fc1693c1ff859ca7fae5a869a7891ec6cd9ead2f94cec8e858938dcb6ba379c160d086a7fbfadadd53cc9e79d755100a4d0b1cf77947fc
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@jridgewell/gen-mapping@npm:^0.3.12, @jridgewell/gen-mapping@npm:^0.3.5":
|
||||
version: 0.3.13
|
||||
resolution: "@jridgewell/gen-mapping@npm:0.3.13"
|
||||
@@ -4368,13 +4361,6 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@mixmark-io/domino@npm:^2.2.0":
|
||||
version: 2.2.0
|
||||
resolution: "@mixmark-io/domino@npm:2.2.0"
|
||||
checksum: 10c0/aa468a15f9217d425220fe6a4b3f9416cbe8e566ee14efc191c6d5cc04fe39338b16a90bbac190f28d44e69465db5f2cf95f479c621ce38060ca6b2a3d346e9d
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@msgpackr-extract/msgpackr-extract-darwin-arm64@npm:3.0.3":
|
||||
version: 3.0.3
|
||||
resolution: "@msgpackr-extract/msgpackr-extract-darwin-arm64@npm:3.0.3"
|
||||
@@ -8774,13 +8760,6 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@types/turndown@npm:^5.0.6":
|
||||
version: 5.0.6
|
||||
resolution: "@types/turndown@npm:5.0.6"
|
||||
checksum: 10c0/cc5648c115b67ba413782fd0a8ae273ad6b87940df770ab9a5fefe0303c368704013fca2a55dd08f46a2132a747912fd47f96a83162c47fd189babf1352ac4be
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@types/unist@npm:^2":
|
||||
version: 2.0.11
|
||||
resolution: "@types/unist@npm:2.0.11"
|
||||
@@ -17313,7 +17292,6 @@ __metadata:
|
||||
"@hocuspocus/extension-throttle": "npm:1.1.2"
|
||||
"@hocuspocus/provider": "npm:1.1.2"
|
||||
"@hocuspocus/server": "npm:1.1.2"
|
||||
"@joplin/turndown-plugin-gfm": "npm:^1.0.49"
|
||||
"@juggle/resize-observer": "npm:^3.4.0"
|
||||
"@linear/sdk": "npm:^58.1.0"
|
||||
"@node-oauth/oauth2-server": "npm:^5.2.0"
|
||||
@@ -17404,7 +17382,6 @@ __metadata:
|
||||
"@types/styled-components": "npm:^5.1.32"
|
||||
"@types/throng": "npm:^5.0.7"
|
||||
"@types/tmp": "npm:^0.2.6"
|
||||
"@types/turndown": "npm:^5.0.6"
|
||||
"@types/utf8": "npm:^3.0.3"
|
||||
"@types/validator": "npm:^13.15.3"
|
||||
"@types/yauzl": "npm:^2.10.3"
|
||||
@@ -17580,7 +17557,6 @@ __metadata:
|
||||
tiny-cookie: "npm:^2.5.1"
|
||||
tmp: "npm:^0.2.5"
|
||||
tunnel-agent: "npm:^0.6.0"
|
||||
turndown: "npm:^7.2.2"
|
||||
typescript: "npm:^5.9.2"
|
||||
ukkonen: "npm:^2.2.0"
|
||||
umzug: "npm:^3.8.2"
|
||||
@@ -21354,15 +21330,6 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"turndown@npm:^7.2.2":
|
||||
version: 7.2.2
|
||||
resolution: "turndown@npm:7.2.2"
|
||||
dependencies:
|
||||
"@mixmark-io/domino": "npm:^2.2.0"
|
||||
checksum: 10c0/ee09f7bd67c468505aad6c3a26b11269ca49ffce07eaa9c212926d068f242b11b4e955b31a58289f26674ff29f91209b29454907551dcaec7da712e524cc78c2
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"type-detect@npm:4.0.8":
|
||||
version: 4.0.8
|
||||
resolution: "type-detect@npm:4.0.8"
|
||||
|
||||
Reference in New Issue
Block a user