perf: Remove turndown (#11331)

* Remove turndown * Refactor htmlToProsemirror * fix: Bug in CSV import * refactor
2026-06-13 11:25:03 +03:00 · 2026-01-31 20:56:36 -05:00
parent 51dd516679
commit bb128318da
22 changed files with 909 additions and 1241 deletions
@@ -75,7 +75,6 @@
    "@hocuspocus/extension-throttle": "1.1.2",
    "@hocuspocus/provider": "1.1.2",
    "@hocuspocus/server": "1.1.2",
-    "@joplin/turndown-plugin-gfm": "^1.0.49",
    "@juggle/resize-observer": "^3.4.0",
    "@linear/sdk": "^58.1.0",
    "@node-oauth/oauth2-server": "^5.2.0",
@@ -253,7 +252,6 @@
    "tiny-cookie": "^2.5.1",
    "tmp": "^0.2.5",
    "tunnel-agent": "^0.6.0",
-    "turndown": "^7.2.2",
    "ukkonen": "^2.2.0",
    "umzug": "^3.8.2",
    "utility-types": "^3.11.0",
@@ -341,7 +339,6 @@
    "@types/styled-components": "^5.1.32",
    "@types/throng": "^5.0.7",
    "@types/tmp": "^0.2.6",
-    "@types/turndown": "^5.0.6",
    "@types/utf8": "^3.0.3",
    "@types/validator": "^13.15.3",
    "@types/yauzl": "^2.10.3",
@@ -1,16 +1,11 @@
-import emojiRegex from "emoji-regex";
 import mime from "mime-types";
-import type { Node } from "prosemirror-model";
 import truncate from "lodash/truncate";
-import parseTitle from "@shared/utils/parseTitle";
 import type { ProsemirrorData } from "@shared/types";
-import { ProsemirrorHelper as SharedProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
 import { DocumentValidation } from "@shared/validations";
 import { serializer } from "@server/editor";
 import { traceFunction } from "@server/logging/tracing";
 import type { User } from "@server/models";
 import { ProsemirrorHelper } from "@server/models/helpers/ProsemirrorHelper";
-import { TextHelper } from "@server/models/helpers/TextHelper";
 import type { APIContext } from "@server/types";
 import { DocumentConverter } from "@server/utils/DocumentConverter";
 import { InvalidRequestError } from "../errors";
@@ -33,14 +28,11 @@ type ImportResult = {
 /**
 * Converts document content to state and validates size constraints.
 *
- * @param content The document content as markdown text or Prosemirror JSON.
+ * @param content The document content as Prosemirror JSON.
 * @param title The document title (used in error messages).
 * @returns The Y.Doc state buffer.
 */
-function convertToState(
-  content: string | ProsemirrorData,
-  title: string
-): Buffer {
+function convertToState(content: ProsemirrorData, title: string): Buffer {
  const ydoc = ProsemirrorHelper.toYDoc(content);
  const state = ProsemirrorHelper.toState(ydoc);

@@ -53,92 +45,6 @@ function convertToState(
  return state;
 }

-/**
- * Imports HTML content directly to Prosemirror, bypassing markdown conversion.
- */
-async function importHtml(
-  content: Buffer | string,
-  title: string,
-  user: User,
-  ctx: APIContext
-): Promise<ImportResult> {
-  let doc: Node = DocumentConverter.htmlToProsemirror(content);
-
-  // Extract title from first H1 if present
-  const headings = SharedProsemirrorHelper.getHeadings(doc);
-  if (headings.length > 0 && headings[0].level === 1) {
-    title = headings[0].title;
-    doc = ProsemirrorHelper.removeFirstHeading(doc);
-  }
-
-  // Extract emoji from start of document
-  const { emoji: icon, doc: docWithoutEmoji } =
-    ProsemirrorHelper.extractEmojiFromStart(doc);
-  doc = docWithoutEmoji;
-
-  // Replace external images with attachments
-  doc = await TextHelper.replaceImagesWithAttachmentsInNode(ctx, doc, user);
-
-  const text = serializer.serialize(doc);
-  title = truncate(title, { length: DocumentValidation.maxTitleLength });
-  const state = convertToState(doc.toJSON(), title);
-
-  return { text, state, title, icon };
-}
-
-/**
- * Imports content via markdown conversion (for docx, md, csv, etc.).
- */
-async function importMarkdown(
-  content: Buffer | string,
-  fileName: string,
-  mimeType: string,
-  title: string,
-  user: User,
-  ctx: APIContext
-): Promise<ImportResult> {
-  let text = await DocumentConverter.convertToMarkdown(
-    content,
-    fileName,
-    mimeType
-  );
-
-  // Find and extract emoji near the beginning of the document
-  const regex = emojiRegex();
-  const matches = regex.exec(text.slice(0, 10));
-  const icon = matches ? matches[0] : undefined;
-  if (icon) {
-    text = text.replace(icon, "");
-  }
-
-  // If the first line looks like a markdown heading, use it as the title
-  if (text.startsWith("# ")) {
-    const result = parseTitle(text);
-    title = result.title;
-    text = text.replace(/^.+(\n|$)/, "");
-  }
-
-  // Replace any <br> generated by turndown with escaped newlines
-  text = text.trim().replace(/<br>/gi, "\\n");
-
-  // Remove any closed and immediately reopened formatting marks
-  text = text.replace(/\*\*\*\*/gi, "").replace(/____/gi, "");
-
-  text = await TextHelper.replaceImagesWithAttachments(ctx, text, user);
-
-  // Sanity check – text cannot possibly be longer than state
-  if (text.length > DocumentValidation.maxStateLength) {
-    throw InvalidRequestError(
-      `The document "${title}" is too large to import, please reduce the length and try again`
-    );
-  }
-
-  title = truncate(title, { length: DocumentValidation.maxTitleLength });
-  const state = convertToState(text, title);
-
-  return { text, state, title, icon };
-}
-
 async function documentImporter({
  mimeType,
  fileName,
@@ -154,18 +60,40 @@ async function documentImporter({
    "html",
    ...(mime.extensions[mimeType] ?? []),
  ];
-  const title = fileName.replace(
+  const fileTitle = fileName.replace(
    new RegExp(`\\.(${extensions.join("|")})$`, "i"),
    ""
  );

-  const isHtml = mimeType === "text/html" || fileName.endsWith(".html");
+  // Convert document using unified converter
+  const {
+    doc,
+    title: extractedTitle,
+    icon,
+  } = await DocumentConverter.convert(content, fileName, mimeType);

-  if (isHtml) {
-    return importHtml(content, title, user, ctx);
+  // Use extracted title or fall back to filename
+  let title = extractedTitle || fileTitle;
+
+  // Replace external images with attachments
+  const processedDoc = await ProsemirrorHelper.replaceImagesWithAttachments(
+    ctx,
+    doc,
+    user
+  );
+
+  // Serialize final text and handle empty documents
+  let text = serializer.serialize(processedDoc).trim();
+  // Empty paragraphs serialize to escaped newlines/backslashes, treat as empty
+  if (/^[\\\s]*$/.test(text)) {
+    text = "";
  }

-  return importMarkdown(content, fileName, mimeType, title, user, ctx);
+  // Truncate title and validate size
+  title = truncate(title, { length: DocumentValidation.maxTitleLength });
+  const state = convertToState(processedDoc.toJSON() as ProsemirrorData, title);
+
+  return { text, state, title, icon };
 }

 export default traceFunction({
@@ -2,10 +2,14 @@ import { faker } from "@faker-js/faker";
 import type { DeepPartial } from "utility-types";
 import type { ProsemirrorData } from "@shared/types";
 import { MentionType } from "@shared/types";
+import { ProsemirrorHelper as SharedProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
+import { createContext } from "@server/context";
 import { buildProseMirrorDoc, buildUser } from "@server/test/factories";
 import type { MentionAttrs } from "./ProsemirrorHelper";
 import { ProsemirrorHelper } from "./ProsemirrorHelper";

+jest.mock("@server/storage/files");
+
 describe("ProsemirrorHelper", () => {
  describe("processMentions", () => {
    it("should handle deleted users", async () => {
@@ -932,141 +936,166 @@ describe("ProsemirrorHelper", () => {
    });
  });

-  describe("htmlToProsemirror", () => {
-    it("should convert basic HTML to Prosemirror", () => {
-      const html = "<p>Hello world</p>";
+  describe("replaceImagesWithAttachments", () => {
+    it("should return the same document when there are no images", async () => {
+      const user = await buildUser();
+      const ctx = createContext({ user });

-      const doc = ProsemirrorHelper.htmlToProsemirror(html);
+      const doc = buildProseMirrorDoc([
+        {
+          type: "paragraph",
+          content: [{ type: "text", text: "No images here" }],
+        },
+      ]);

-      expect(doc.type.name).toBe("doc");
-      expect(doc.content.childCount).toBe(1);
-      expect(doc.content.child(0).type.name).toBe("paragraph");
-      expect(doc.content.child(0).textContent).toBe("Hello world");
+      const result = await ProsemirrorHelper.replaceImagesWithAttachments(
+        ctx,
+        doc,
+        user
+      );
+
+      expect(result.toJSON()).toEqual(doc.toJSON());
    });

-    it("should convert HTML with heading", () => {
-      const html = "<h1>Title</h1><p>Content</p>";
+    it("should correctly identify images in a document", () => {
+      const doc = buildProseMirrorDoc([
+        {
+          type: "paragraph",
+          content: [
+            {
+              type: "image",
+              attrs: {
+                src: "https://example.com/image.png",
+                alt: "Test image",
+              },
+            },
+          ],
+        },
+      ]);

-      const doc = ProsemirrorHelper.htmlToProsemirror(html);
-
-      expect(doc.content.childCount).toBe(2);
-      expect(doc.content.child(0).type.name).toBe("heading");
-      expect(doc.content.child(0).attrs.level).toBe(1);
-      expect(doc.content.child(0).textContent).toBe("Title");
-      expect(doc.content.child(1).type.name).toBe("paragraph");
+      const images = SharedProsemirrorHelper.getImages(doc);
+      expect(images.length).toBe(1);
+      expect(images[0].attrs.src).toBe("https://example.com/image.png");
+      expect(images[0].attrs.alt).toBe("Test image");
    });

-    it("should remove script tags", () => {
-      const html = "<p>Safe content</p><script>alert('xss')</script>";
+    it("should skip images with invalid URLs", async () => {
+      const user = await buildUser();
+      const ctx = createContext({ user });

-      const doc = ProsemirrorHelper.htmlToProsemirror(html);
+      const doc = buildProseMirrorDoc([
+        {
+          type: "paragraph",
+          content: [
+            {
+              type: "image",
+              attrs: {
+                src: "not-a-valid-url",
+                alt: "Invalid",
+              },
+            },
+          ],
+        },
+      ]);

-      expect(doc.textContent).toBe("Safe content");
-      expect(doc.textContent).not.toContain("alert");
+      const result = await ProsemirrorHelper.replaceImagesWithAttachments(
+        ctx,
+        doc,
+        user
+      );
+
+      // Document should remain unchanged since URL is invalid
+      expect(result.toJSON()).toEqual(doc.toJSON());
    });

-    it("should remove style tags", () => {
-      const html = "<style>body { color: red; }</style><p>Content</p>";
+    it("should skip images with internal URLs", async () => {
+      const user = await buildUser();
+      const ctx = createContext({ user });

-      const doc = ProsemirrorHelper.htmlToProsemirror(html);
+      const doc = buildProseMirrorDoc([
+        {
+          type: "paragraph",
+          content: [
+            {
+              type: "image",
+              attrs: {
+                src: "/api/attachments.redirect?id=existing-id",
+                alt: "Internal",
+              },
+            },
+          ],
+        },
+      ]);

-      expect(doc.textContent).toBe("Content");
-      expect(doc.textContent).not.toContain("color");
+      const result = await ProsemirrorHelper.replaceImagesWithAttachments(
+        ctx,
+        doc,
+        user
+      );
+
+      // Document should remain unchanged since URL is internal
+      expect(result.toJSON()).toEqual(doc.toJSON());
    });

-    it("should handle Buffer input", () => {
-      const html = Buffer.from("<p>From buffer</p>", "utf8");
+    it("should handle document with multiple node types", async () => {
+      const user = await buildUser();
+      const ctx = createContext({ user });

-      const doc = ProsemirrorHelper.htmlToProsemirror(html);
+      const doc = buildProseMirrorDoc([
+        {
+          type: "heading",
+          attrs: { level: 1 },
+          content: [{ type: "text", text: "Title" }],
+        },
+        {
+          type: "paragraph",
+          content: [{ type: "text", text: "Some text" }],
+        },
+        {
+          type: "paragraph",
+          content: [
+            {
+              type: "image",
+              attrs: {
+                src: "invalid-url",
+                alt: "Image",
+              },
+            },
+          ],
+        },
+      ]);

-      expect(doc.content.child(0).textContent).toBe("From buffer");
+      const result = await ProsemirrorHelper.replaceImagesWithAttachments(
+        ctx,
+        doc,
+        user
+      );
+
+      // Document structure should be preserved
+      expect(result.content.childCount).toBe(3);
+      expect(result.content.child(0).type.name).toBe("heading");
+      expect(result.content.child(1).type.name).toBe("paragraph");
+      expect(result.content.child(2).type.name).toBe("paragraph");
    });

-    it("should convert HTML with lists", () => {
-      const html = "<ul><li>Item 1</li><li>Item 2</li></ul>";
+    it("should handle empty document", async () => {
+      const user = await buildUser();
+      const ctx = createContext({ user });

-      const doc = ProsemirrorHelper.htmlToProsemirror(html);
+      const doc = buildProseMirrorDoc([
+        {
+          type: "paragraph",
+          content: [],
+        },
+      ]);

-      expect(doc.content.childCount).toBe(1);
-      expect(doc.content.child(0).type.name).toBe("bullet_list");
-      expect(doc.content.child(0).content.childCount).toBe(2);
-    });
+      const result = await ProsemirrorHelper.replaceImagesWithAttachments(
+        ctx,
+        doc,
+        user
+      );

-    it("should convert HTML with bold and italic", () => {
-      const html = "<p><strong>Bold</strong> and <em>italic</em></p>";
-
-      const doc = ProsemirrorHelper.htmlToProsemirror(html);
-
-      const paragraph = doc.content.child(0);
-      expect(paragraph.type.name).toBe("paragraph");
-
-      // Check that marks are applied
-      const boldText = paragraph.content.child(0);
-      expect(boldText.text).toBe("Bold");
-      expect(boldText.marks.some((m) => m.type.name === "strong")).toBe(true);
-
-      const italicText = paragraph.content.child(2);
-      expect(italicText.text).toBe("italic");
-      expect(italicText.marks.some((m) => m.type.name === "em")).toBe(true);
-    });
-
-    it("should handle full HTML document", () => {
-      const html = `
-        <!DOCTYPE html>
-        <html>
-          <head>
-            <title>Test</title>
-            <meta charset="utf-8">
-          </head>
-          <body>
-            <h1>Document Title</h1>
-            <p>Paragraph content</p>
-          </body>
-        </html>
-      `;
-
-      const doc = ProsemirrorHelper.htmlToProsemirror(html);
-
-      expect(doc.content.childCount).toBe(2);
-      expect(doc.content.child(0).type.name).toBe("heading");
-      expect(doc.content.child(0).textContent).toBe("Document Title");
-      expect(doc.content.child(1).type.name).toBe("paragraph");
-      expect(doc.content.child(1).textContent).toBe("Paragraph content");
-    });
-
-    it("should remove emoticon images", () => {
-      const html = `<p>Hello <img class="emoticon" src="smile.png" alt=":)"> world</p>`;
-
-      const doc = ProsemirrorHelper.htmlToProsemirror(html);
-
-      // Emoticon image should be removed, text content remains
-      expect(doc.textContent).not.toContain(":)");
-      expect(doc.textContent).toContain("Hello");
-      expect(doc.textContent).toContain("world");
-    });
-
-    it("should remove Jira icon images", () => {
-      const html = `
-        <p>Issue: <span class="jira-issue-key"><img class="icon" src="icon.png">ABC-123</span></p>
-      `;
-
-      const doc = ProsemirrorHelper.htmlToProsemirror(html);
-
-      expect(doc.textContent).toBe("Issue: ABC-123");
-    });
-
-    it("should apply Confluence image sizing", () => {
-      const html = `
-        <p><img src="image.png" data-width="800" data-height="600" width="400"></p>
-      `;
-
-      const doc = ProsemirrorHelper.htmlToProsemirror(html);
-
-      const paragraph = doc.content.child(0);
-      const image = paragraph.content.child(0);
-      expect(image.type.name).toBe("image");
-      expect(image.attrs.width).toBe(400);
-      expect(image.attrs.height).toBe(300);
+      expect(result.toJSON()).toEqual(doc.toJSON());
    });
  });
 });
@@ -1,16 +1,13 @@
 import emojiRegex from "emoji-regex";
 import { JSDOM } from "jsdom";
+import chunk from "lodash/chunk";
 import compact from "lodash/compact";
 import { EditorState } from "prosemirror-state";
 import { EditorView } from "prosemirror-view";
 import flatten from "lodash/flatten";
 import isMatch from "lodash/isMatch";
 import uniq from "lodash/uniq";
-import {
-  Node,
-  Fragment,
-  DOMParser as ProsemirrorDOMParser,
-} from "prosemirror-model";
+import { Node, Fragment } from "prosemirror-model";
 import { renderToString } from "react-dom/server";
 import styled, { ServerStyleSheet, ThemeProvider } from "styled-components";
 import { prosemirrorToYDoc } from "y-prosemirror";
@@ -22,17 +19,23 @@ import EditorContainer from "@shared/editor/components/Styles";
 import GlobalStyles from "@shared/styles/globals";
 import light from "@shared/styles/theme";
 import type { ProsemirrorData, UnfurlResponse } from "@shared/types";
-import { MentionType } from "@shared/types";
-import { attachmentRedirectRegex } from "@shared/utils/ProsemirrorHelper";
+import { AttachmentPreset, MentionType } from "@shared/types";
+import {
+  attachmentRedirectRegex,
+  ProsemirrorHelper as SharedProsemirrorHelper,
+} from "@shared/utils/ProsemirrorHelper";
 import parseDocumentSlug from "@shared/utils/parseDocumentSlug";
 import { isRTL } from "@shared/utils/rtl";
 import { isInternalUrl } from "@shared/utils/urls";
+import attachmentCreator from "@server/commands/attachmentCreator";
 import { plugins, schema, parser } from "@server/editor";
+import env from "@server/env";
 import Logger from "@server/logging/Logger";
 import { trace } from "@server/logging/tracing";
 import Attachment from "@server/models/Attachment";
 import User from "@server/models/User";
 import FileStorage from "@server/storage/files";
+import type { APIContext } from "@server/types";

 export type HTMLOptions = {
  /** A title, if it should be included */
@@ -798,88 +801,14 @@ export class ProsemirrorHelper {
    };
  }

-  /**
-   * Convert HTML content directly to a Prosemirror document node.
-   *
-   * @param content The HTML content as a string or Buffer.
-   * @returns A Prosemirror Node representing the document.
-   */
-  public static htmlToProsemirror(content: Buffer | string): Node {
-    if (typeof content !== "string") {
-      content = content.toString("utf8");
-    }
-
-    const dom = new JSDOM(content);
-    const document = dom.window.document;
-
-    // Remove problematic elements before parsing
-    const elementsToRemove = document.querySelectorAll(
-      "script, style, title, head, meta, link"
-    );
-    elementsToRemove.forEach((el) => el.remove());
-
-    // Preprocess the DOM to handle cases that turndown plugins handled
-    this.preprocessHtmlForImport(document);
-
-    // Patch global environment for Prosemirror DOMParser
-    const cleanup = this.patchGlobalEnv(dom.window);
-
-    try {
-      const domParser = ProsemirrorDOMParser.fromSchema(schema);
-      return domParser.parse(document.body);
-    } finally {
-      cleanup();
-    }
-  }
-
-  /**
-   * Preprocesses HTML DOM before Prosemirror parsing to cleanup
-   * images and other elements.
-   *
-   * @param document The DOM document to preprocess.
-   */
-  private static preprocessHtmlForImport(document: Document): void {
-    // Handle images: filter emoticons, remove Jira icons, apply Confluence sizing
-    const images = document.querySelectorAll("img");
-    images.forEach((img) => {
-      const className = img.className || "";
-
-      // Skip emoticon images (they'll be dropped)
-      if (className.includes("emoticon")) {
-        img.remove();
-        return;
-      }
-
-      // Remove Jira icon images
-      if (
-        className === "icon" &&
-        img.parentElement?.className.includes("jira-issue-key")
-      ) {
-        img.remove();
-        return;
-      }
-
-      // Handle Confluence image sizing: data-width/data-height → width/height
-      const dataWidth = img.getAttribute("data-width");
-      const dataHeight = img.getAttribute("data-height");
-      const width = img.getAttribute("width");
-
-      if (dataWidth && dataHeight && width) {
-        const ratio = parseInt(dataWidth) / parseInt(width);
-        const calculatedHeight = Math.round(parseInt(dataHeight) / ratio);
-        img.setAttribute("height", String(calculatedHeight));
-      }
-    });
-  }
-
  /**
   * Patches the global environment with properties from the JSDOM window,
   * necessary for ProseMirror to run in a Node environment.
   *
-   * @param domWindow The JSDOM window object
-   * @returns A cleanup function to restore the global environment
+   * @param domWindow The JSDOM window object.
+   * @returns A cleanup function to restore the global environment.
   */
-  private static patchGlobalEnv(domWindow: JSDOM["window"]) {
+  public static patchGlobalEnv(domWindow: JSDOM["window"]) {
    const g = global as any;

    const globalParams = {
@@ -922,4 +851,109 @@ export class ProsemirrorHelper {
      });
    };
  }
+
+  /**
+   * Replaces remote and base64 encoded images in the given Prosemirror node
+   * with attachment urls and uploads the images to the storage provider.
+   *
+   * @param ctx The API context.
+   * @param doc The Prosemirror node to process.
+   * @param user The user context.
+   * @returns A new Prosemirror node with images replaced.
+   */
+  static async replaceImagesWithAttachments(
+    ctx: APIContext,
+    doc: Node,
+    user: User
+  ): Promise<Node> {
+    const images = SharedProsemirrorHelper.getImages(doc);
+    const videos = SharedProsemirrorHelper.getVideos(doc);
+    const nodes = [...images, ...videos];
+
+    if (!nodes.length) {
+      return doc;
+    }
+
+    const timeoutPerImage = Math.floor(
+      Math.min(env.REQUEST_TIMEOUT / nodes.length, 10000)
+    );
+
+    const urlToAttachment: Map<string, Attachment> = new Map();
+    const chunks = chunk(nodes, 10);
+
+    for (const nodeChunk of chunks) {
+      await Promise.all(
+        nodeChunk.map(async (node) => {
+          const src = String(node.attrs.src ?? "");
+
+          // Skip invalid URLs
+          try {
+            new URL(src);
+          } catch {
+            return;
+          }
+
+          // Skip internal URLs
+          if (isInternalUrl(src)) {
+            return;
+          }
+
+          // Skip already processed
+          if (urlToAttachment.has(src)) {
+            return;
+          }
+
+          try {
+            const attachment = await attachmentCreator({
+              name: String(node.attrs.alt ?? node.type.name),
+              url: src,
+              preset: AttachmentPreset.DocumentAttachment,
+              user,
+              fetchOptions: {
+                timeout: timeoutPerImage,
+              },
+              ctx,
+            });
+
+            if (attachment) {
+              urlToAttachment.set(src, attachment);
+            }
+          } catch (err) {
+            Logger.warn("Failed to download image for attachment", {
+              error: err.message,
+              src,
+            });
+          }
+        })
+      );
+    }
+
+    // Transform the document to replace image/video src attributes
+    const transformFragment = (fragment: Fragment): Fragment => {
+      const transformedNodes: Node[] = [];
+
+      fragment.forEach((node) => {
+        if (node.type.name === "image" || node.type.name === "video") {
+          const src = String(node.attrs.src ?? "");
+          const attachment = urlToAttachment.get(src);
+
+          if (attachment) {
+            const json = node.toJSON();
+            json.attrs = { ...json.attrs, src: attachment.redirectUrl };
+            transformedNodes.push(Node.fromJSON(schema, json));
+          } else {
+            transformedNodes.push(node);
+          }
+        } else if (node.content.size > 0) {
+          transformedNodes.push(node.copy(transformFragment(node.content)));
+        } else {
+          transformedNodes.push(node);
+        }
+      });
+
+      return Fragment.fromArray(transformedNodes);
+    };
+
+    return doc.copy(transformFragment(doc.content));
+  }
 }
@@ -1,12 +1,12 @@
 import { ProsemirrorHelper as SharedProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
 import { createContext } from "@server/context";
 import { buildProseMirrorDoc, buildUser } from "@server/test/factories";
-import { TextHelper } from "./TextHelper";
+import { ProsemirrorHelper } from "./ProsemirrorHelper";

 jest.mock("@server/storage/files");

-describe("TextHelper", () => {
-  describe("replaceImagesWithAttachmentsInNode", () => {
+describe("ProsemirrorHelper", () => {
+  describe("replaceImagesWithAttachments", () => {
    it("should return the same document when there are no images", async () => {
      const user = await buildUser();
      const ctx = createContext({ user });
@@ -18,7 +18,7 @@ describe("TextHelper", () => {
        },
      ]);

-      const result = await TextHelper.replaceImagesWithAttachmentsInNode(
+      const result = await ProsemirrorHelper.replaceImagesWithAttachments(
        ctx,
        doc,
        user
@@ -68,7 +68,7 @@ describe("TextHelper", () => {
        },
      ]);

-      const result = await TextHelper.replaceImagesWithAttachmentsInNode(
+      const result = await ProsemirrorHelper.replaceImagesWithAttachments(
        ctx,
        doc,
        user
@@ -97,7 +97,7 @@ describe("TextHelper", () => {
        },
      ]);

-      const result = await TextHelper.replaceImagesWithAttachmentsInNode(
+      const result = await ProsemirrorHelper.replaceImagesWithAttachments(
        ctx,
        doc,
        user
@@ -135,7 +135,7 @@ describe("TextHelper", () => {
        },
      ]);

-      const result = await TextHelper.replaceImagesWithAttachmentsInNode(
+      const result = await ProsemirrorHelper.replaceImagesWithAttachments(
        ctx,
        doc,
        user
@@ -159,7 +159,7 @@ describe("TextHelper", () => {
        },
      ]);

-      const result = await TextHelper.replaceImagesWithAttachmentsInNode(
+      const result = await ProsemirrorHelper.replaceImagesWithAttachments(
        ctx,
        doc,
        user
@@ -1,10 +1,8 @@
 import chunk from "lodash/chunk";
 import escapeRegExp from "lodash/escapeRegExp";
-import { Fragment, Node } from "prosemirror-model";
 import { AttachmentPreset } from "@shared/types";
-import { ProsemirrorHelper as SharedProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
+import { isInternalUrl } from "@shared/utils/urls";
 import attachmentCreator from "@server/commands/attachmentCreator";
-import { schema } from "@server/editor";
 import env from "@server/env";
 import Logger from "@server/logging/Logger";
 import { trace } from "@server/logging/tracing";
@@ -14,7 +12,6 @@ import FileStorage from "@server/storage/files";
 import type { APIContext } from "@server/types";
 import parseAttachmentIds from "@server/utils/parseAttachmentIds";
 import parseImages from "@server/utils/parseImages";
-import { isInternalUrl } from "@shared/utils/urls";

@trace()
 export class TextHelper {
@@ -131,109 +128,4 @@ export class TextHelper {

    return output;
  }
-
-  /**
-   * Replaces remote and base64 encoded images in the given Prosemirror node
-   * with attachment urls and uploads the images to the storage provider.
-   *
-   * @param ctx The API context.
-   * @param doc The Prosemirror node to process.
-   * @param user The user context.
-   * @returns A new Prosemirror node with images replaced.
-   */
-  static async replaceImagesWithAttachmentsInNode(
-    ctx: APIContext,
-    doc: Node,
-    user: User
-  ): Promise<Node> {
-    const images = SharedProsemirrorHelper.getImages(doc);
-    const videos = SharedProsemirrorHelper.getVideos(doc);
-    const nodes = [...images, ...videos];
-
-    if (!nodes.length) {
-      return doc;
-    }
-
-    const timeoutPerImage = Math.floor(
-      Math.min(env.REQUEST_TIMEOUT / nodes.length, 10000)
-    );
-
-    const urlToAttachment: Map<string, Attachment> = new Map();
-    const chunks = chunk(nodes, 10);
-
-    for (const nodeChunk of chunks) {
-      await Promise.all(
-        nodeChunk.map(async (node) => {
-          const src = String(node.attrs.src ?? "");
-
-          // Skip invalid URLs
-          try {
-            new URL(src);
-          } catch {
-            return;
-          }
-
-          // Skip internal URLs
-          if (isInternalUrl(src)) {
-            return;
-          }
-
-          // Skip already processed
-          if (urlToAttachment.has(src)) {
-            return;
-          }
-
-          try {
-            const attachment = await attachmentCreator({
-              name: String(node.attrs.alt ?? node.type.name),
-              url: src,
-              preset: AttachmentPreset.DocumentAttachment,
-              user,
-              fetchOptions: {
-                timeout: timeoutPerImage,
-              },
-              ctx,
-            });
-
-            if (attachment) {
-              urlToAttachment.set(src, attachment);
-            }
-          } catch (err) {
-            Logger.warn("Failed to download image for attachment", {
-              error: err.message,
-              src,
-            });
-          }
-        })
-      );
-    }
-
-    // Transform the document to replace image/video src attributes
-    const transformFragment = (fragment: Fragment): Fragment => {
-      const transformedNodes: Node[] = [];
-
-      fragment.forEach((node) => {
-        if (node.type.name === "image" || node.type.name === "video") {
-          const src = String(node.attrs.src ?? "");
-          const attachment = urlToAttachment.get(src);
-
-          if (attachment) {
-            const json = node.toJSON();
-            json.attrs = { ...json.attrs, src: attachment.redirectUrl };
-            transformedNodes.push(Node.fromJSON(schema, json));
-          } else {
-            transformedNodes.push(node);
-          }
-        } else if (node.content.size > 0) {
-          transformedNodes.push(node.copy(transformFragment(node.content)));
-        } else {
-          transformedNodes.push(node);
-        }
-      });
-
-      return Fragment.fromArray(transformedNodes);
-    };
-
-    return doc.copy(transformFragment(doc.content));
-  }
 }
@@ -11,15 +11,6 @@ declare module "email-providers" {
  export default list;
 }

-declare module "@joplin/turndown-plugin-gfm" {
-  import { Plugin } from "turndown";
-
-  export const strikethrough: Plugin;
-  export const tables: Plugin;
-  export const taskListItems: Plugin;
-  export const gfm: Plugin;
-}
-
 declare module "ukkonen" {
  export default function ukkonen(
    first: string,
@@ -1,31 +1,291 @@
 import { DocumentConverter } from "./DocumentConverter";

-describe("csvToMarkdown", () => {
-  it("should convert csv to markdown with comma", async () => {
-    const csv = `name,age
+describe("DocumentConverter", () => {
+  describe("convert", () => {
+    describe("csv", () => {
+      it("should convert csv to markdown table", async () => {
+        const csv = `name,age
 John,25
 Jane,24`;

-    const markdown = `| name | age |
-| --- | --- |
-| John | 25 |
-| Jane | 24 |
-`;
+        const result = await DocumentConverter.convert(
+          csv,
+          "test.csv",
+          "text/csv"
+        );

-    expect(await DocumentConverter.csvToMarkdown(csv)).toEqual(markdown);
-  });
+        // CSV is converted to a markdown table
+        expect(result.text).toContain("| name | age |");
+        expect(result.text).toContain("John");
+        expect(result.text).toContain("Jane");
+        expect(result.title).toEqual("");
+      });

-  it("should convert csv to markdown with semicolon", async () => {
-    const csv = `name;age
+      it("should handle csv with semicolon delimiter", async () => {
+        const csv = `name;age
 John;25
 "Joan ""the bone"", Anne";24`;

-    const markdown = `| name | age |
-| --- | --- |
-| John | 25 |
-| Joan "the bone", Anne | 24 |
-`;
+        const result = await DocumentConverter.convert(
+          csv,
+          "test.csv",
+          "text/csv"
+        );

-    expect(await DocumentConverter.csvToMarkdown(csv)).toEqual(markdown);
+        expect(result.text).toContain("| name | age |");
+        expect(result.text).toContain("John");
+        expect(result.text).toContain('Joan "the bone", Anne');
+      });
+
+      it("should handle csv with title row before headers", async () => {
+        // Some financial exports have a title row before the actual headers
+        const csv = `"Report for Account"
+
+"Symbol","Name","Value",
+"ABC","Test Corp","$100",
+"XYZ","Other Inc","$200",`;
+
+        const result = await DocumentConverter.convert(
+          csv,
+          "test.csv",
+          "text/csv"
+        );
+
+        // The actual data headers should be used, not the title row
+        expect(result.text).toContain("| Symbol | Name | Value |");
+        expect(result.text).toContain("ABC");
+        expect(result.text).toContain("Test Corp");
+        expect(result.text).toContain("XYZ");
+      });
+
+      it("should handle csv with trailing comma on each line", async () => {
+        const csv = `name,age,city,
+John,25,NYC,
+Jane,24,LA,`;
+
+        const result = await DocumentConverter.convert(
+          csv,
+          "test.csv",
+          "text/csv"
+        );
+
+        expect(result.text).toContain("| name | age | city |");
+        expect(result.text).toContain("John");
+        expect(result.text).toContain("Jane");
+        // Should not have trailing empty column
+        expect(result.text).not.toContain("| city |  |");
+        expect(result.text).not.toContain("| city | |");
+      });
+
+      it("should preserve intentionally empty cells at end of rows", async () => {
+        const csv = `name,age,city
+John,25,NYC
+Jane,24,`;
+
+        const result = await DocumentConverter.convert(
+          csv,
+          "test.csv",
+          "text/csv"
+        );
+
+        expect(result.text).toContain("| name | age | city |");
+        expect(result.text).toContain("John");
+        expect(result.text).toContain("NYC");
+        // Jane's row should have 3 columns (empty city preserved)
+        expect(result.text).toMatch(/\| Jane \| 24\s*\|\s*\|/);
+      });
+    });
+
+    describe("html", () => {
+      it("should extract title from H1", async () => {
+        const html = "<h1>My Title</h1><p>Content here</p>";
+        const result = await DocumentConverter.convert(
+          html,
+          "test.html",
+          "text/html"
+        );
+
+        expect(result.title).toEqual("My Title");
+        expect(result.text).toContain("Content here");
+        expect(result.text).not.toContain("My Title");
+      });
+
+      it("should extract emoji from start", async () => {
+        const html = "<p>🚀 Launch content</p>";
+        const result = await DocumentConverter.convert(
+          html,
+          "test.html",
+          "text/html"
+        );
+
+        expect(result.icon).toEqual("🚀");
+        expect(result.text).not.toMatch(/^🚀/);
+      });
+    });
+
+    describe("markdown", () => {
+      it("should extract title from H1", async () => {
+        const md = "# My Title\n\nContent here";
+        const result = await DocumentConverter.convert(
+          md,
+          "test.md",
+          "text/markdown"
+        );
+
+        expect(result.title).toEqual("My Title");
+        expect(result.text).toContain("Content here");
+        expect(result.text).not.toContain("My Title");
+      });
+
+      it("should return empty title when no H1", async () => {
+        const md = "## Subtitle\n\nContent here";
+        const result = await DocumentConverter.convert(
+          md,
+          "test.md",
+          "text/markdown"
+        );
+
+        expect(result.title).toEqual("");
+        expect(result.text).toContain("Subtitle");
+      });
+    });
+  });
+
+  describe("htmlToProsemirror", () => {
+    it("should convert basic HTML to Prosemirror", () => {
+      const html = "<p>Hello world</p>";
+
+      const doc = DocumentConverter.htmlToProsemirror(html);
+
+      expect(doc.type.name).toBe("doc");
+      expect(doc.content.childCount).toBe(1);
+      expect(doc.content.child(0).type.name).toBe("paragraph");
+      expect(doc.content.child(0).textContent).toBe("Hello world");
+    });
+
+    it("should convert HTML with heading", () => {
+      const html = "<h1>Title</h1><p>Content</p>";
+
+      const doc = DocumentConverter.htmlToProsemirror(html);
+
+      expect(doc.content.childCount).toBe(2);
+      expect(doc.content.child(0).type.name).toBe("heading");
+      expect(doc.content.child(0).attrs.level).toBe(1);
+      expect(doc.content.child(0).textContent).toBe("Title");
+      expect(doc.content.child(1).type.name).toBe("paragraph");
+    });
+
+    it("should remove script tags", () => {
+      const html = "<p>Safe content</p><script>alert('xss')</script>";
+
+      const doc = DocumentConverter.htmlToProsemirror(html);
+
+      expect(doc.textContent).toBe("Safe content");
+      expect(doc.textContent).not.toContain("alert");
+    });
+
+    it("should remove style tags", () => {
+      const html = "<style>body { color: red; }</style><p>Content</p>";
+
+      const doc = DocumentConverter.htmlToProsemirror(html);
+
+      expect(doc.textContent).toBe("Content");
+      expect(doc.textContent).not.toContain("color");
+    });
+
+    it("should handle Buffer input", () => {
+      const html = Buffer.from("<p>From buffer</p>", "utf8");
+
+      const doc = DocumentConverter.htmlToProsemirror(html);
+
+      expect(doc.content.child(0).textContent).toBe("From buffer");
+    });
+
+    it("should convert HTML with lists", () => {
+      const html = "<ul><li>Item 1</li><li>Item 2</li></ul>";
+
+      const doc = DocumentConverter.htmlToProsemirror(html);
+
+      expect(doc.content.childCount).toBe(1);
+      expect(doc.content.child(0).type.name).toBe("bullet_list");
+      expect(doc.content.child(0).content.childCount).toBe(2);
+    });
+
+    it("should convert HTML with bold and italic", () => {
+      const html = "<p><strong>Bold</strong> and <em>italic</em></p>";
+
+      const doc = DocumentConverter.htmlToProsemirror(html);
+
+      const paragraph = doc.content.child(0);
+      expect(paragraph.type.name).toBe("paragraph");
+
+      // Check that marks are applied
+      const boldText = paragraph.content.child(0);
+      expect(boldText.text).toBe("Bold");
+      expect(boldText.marks.some((m) => m.type.name === "strong")).toBe(true);
+
+      const italicText = paragraph.content.child(2);
+      expect(italicText.text).toBe("italic");
+      expect(italicText.marks.some((m) => m.type.name === "em")).toBe(true);
+    });
+
+    it("should handle full HTML document", () => {
+      const html = `
+        <!DOCTYPE html>
+        <html>
+          <head>
+            <title>Test</title>
+            <meta charset="utf-8">
+          </head>
+          <body>
+            <h1>Document Title</h1>
+            <p>Paragraph content</p>
+          </body>
+        </html>
+      `;
+
+      const doc = DocumentConverter.htmlToProsemirror(html);
+
+      expect(doc.content.childCount).toBe(2);
+      expect(doc.content.child(0).type.name).toBe("heading");
+      expect(doc.content.child(0).textContent).toBe("Document Title");
+      expect(doc.content.child(1).type.name).toBe("paragraph");
+      expect(doc.content.child(1).textContent).toBe("Paragraph content");
+    });
+
+    it("should remove emoticon images", () => {
+      const html = `<p>Hello <img class="emoticon" src="smile.png" alt=":)"> world</p>`;
+
+      const doc = DocumentConverter.htmlToProsemirror(html);
+
+      // Emoticon image should be removed, text content remains
+      expect(doc.textContent).not.toContain(":)");
+      expect(doc.textContent).toContain("Hello");
+      expect(doc.textContent).toContain("world");
+    });
+
+    it("should remove Jira icon images", () => {
+      const html = `
+        <p>Issue: <span class="jira-issue-key"><img class="icon" src="icon.png">ABC-123</span></p>
+      `;
+
+      const doc = DocumentConverter.htmlToProsemirror(html);
+
+      expect(doc.textContent).toBe("Issue: ABC-123");
+    });
+
+    it("should apply Confluence image sizing", () => {
+      const html = `
+        <p><img src="image.png" data-width="800" data-height="600" width="400"></p>
+      `;
+
+      const doc = DocumentConverter.htmlToProsemirror(html);
+
+      const paragraph = doc.content.child(0);
+      const image = paragraph.content.child(0);
+      expect(image.type.name).toBe("image");
+      expect(image.attrs.width).toBe(400);
+      expect(image.attrs.height).toBe(300);
+    });
  });
 });
@@ -1,89 +1,79 @@
 import { parse } from "@fast-csv/parse";
+import { JSDOM } from "jsdom";
 import escapeRegExp from "lodash/escapeRegExp";
 import { simpleParser } from "mailparser";
 import mammoth from "mammoth";
 import type { Node } from "prosemirror-model";
+import { DOMParser as ProsemirrorDOMParser } from "prosemirror-model";
+import { ProsemirrorHelper as SharedProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
+import { schema, serializer } from "@server/editor";
 import { FileImportError } from "@server/errors";
 import { trace, traceFunction } from "@server/logging/tracing";
 import { ProsemirrorHelper } from "@server/models/helpers/ProsemirrorHelper";
-import turndownService from "@server/utils/turndown";
+
+export interface ConvertResult {
+  /** The document content as markdown text. */
+  text: string;
+  /** The document content as Prosemirror. */
+  doc: Node;
+  /** The extracted title (from H1 heading if present). */
+  title: string;
+  /** The extracted emoji/icon from start of document. */
+  icon?: string;
+}

@trace()
 export class DocumentConverter {
  /**
-   * Convert an incoming file to markdown.
+   * Convert an incoming file to a structured document result.
+   *
   * @param content The content of the file.
   * @param fileName The name of the file, including extension.
   * @param mimeType The mime type of the file.
-   * @returns The markdown representation of the file.
+   * @returns The converted document with text, data, title, and icon.
   */
-  public static async convertToMarkdown(
+  public static async convert(
    content: Buffer | string,
    fileName: string,
    mimeType: string
-  ) {
-    return (
-      await this.internalConvertToMarkdown(content, fileName, mimeType)
-    ).trim();
-  }
+  ): Promise<ConvertResult> {
+    let doc: Node;

-  private static async internalConvertToMarkdown(
-    content: Buffer | string,
-    fileName: string,
-    mimeType: string
-  ) {
-    // First try to convert the file based on the mime type.
-    switch (mimeType) {
-      case "application/msword":
-        return this.confluenceToMarkdown(content);
-      case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
-        return this.docXToMarkdown(content);
-      case "text/html":
-        return this.htmlToMarkdown(content);
-      case "text/plain":
-      case "text/markdown":
-        return this.fileToMarkdown(content);
-      case "text/csv":
-        return this.csvToMarkdown(content);
-      default:
-        break;
+    // Route to appropriate conversion method
+    const html = await this.convertToHtml(content, fileName, mimeType);
+    if (html !== undefined) {
+      doc = this.htmlToProsemirror(html);
+    } else {
+      const markdown = await this.convertToMarkdown(
+        content,
+        fileName,
+        mimeType
+      );
+      doc = ProsemirrorHelper.toProsemirror(markdown);
    }

-    // If the mime type doesn't work, try to convert based on the file extension.
-    const extension = fileName.split(".").pop();
-    switch (extension) {
-      case "docx":
-        return this.docXToMarkdown(content);
-      case "html":
-        return this.htmlToMarkdown(content);
-      case "md":
-      case "markdown":
-        return this.fileToMarkdown(content);
-      default:
-        throw FileImportError(`File type ${mimeType} not supported`);
-    }
-  }
-
-  public static async docXToMarkdown(content: Buffer | string) {
-    if (content instanceof Buffer) {
-      const { value } = await traceFunction({ spanName: "convertToHtml" })(
-        mammoth.convertToHtml
-      )({
-        buffer: content,
-      });
-
-      return turndownService.turndown(value);
+    // Extract title from first H1 heading
+    let title = "";
+    const headings = SharedProsemirrorHelper.getHeadings(doc);
+    if (headings.length > 0 && headings[0].level === 1) {
+      title = headings[0].title;
+      doc = ProsemirrorHelper.removeFirstHeading(doc);
    }

-    throw FileImportError("Unsupported Word file");
-  }
+    // Extract emoji from start of document
+    const { emoji: icon, doc: docWithoutEmoji } =
+      ProsemirrorHelper.extractEmojiFromStart(doc);
+    doc = docWithoutEmoji;

-  public static async htmlToMarkdown(content: Buffer | string) {
-    if (typeof content !== "string") {
-      content = content.toString("utf8");
-    }
+    // Serialize to markdown and trim whitespace
+    const text = serializer.serialize(doc).trim();

-    return turndownService.turndown(content);
+    return {
+      text,
+      doc,
+      title,
+      icon,
+    };
  }

  /**
@@ -93,19 +83,231 @@ export class DocumentConverter {
   * @returns A Prosemirror Node representing the document.
   */
  public static htmlToProsemirror(content: Buffer | string): Node {
-    return ProsemirrorHelper.htmlToProsemirror(content);
+    if (typeof content !== "string") {
+      content = content.toString("utf8");
+    }
+
+    const dom = new JSDOM(content);
+    const document = dom.window.document;
+
+    // Remove problematic elements before parsing
+    const elementsToRemove = document.querySelectorAll(
+      "script, style, title, head, meta, link"
+    );
+    elementsToRemove.forEach((el) => el.remove());
+
+    // Preprocess the DOM to handle edge cases
+    this.preprocessHtmlForImport(document);
+
+    // Patch global environment for Prosemirror DOMParser
+    const cleanup = ProsemirrorHelper.patchGlobalEnv(dom.window);
+
+    try {
+      const domParser = ProsemirrorDOMParser.fromSchema(schema);
+      return domParser.parse(document.body);
+    } finally {
+      cleanup();
+    }
  }

-  public static csvToMarkdown(content: Buffer | string): Promise<string> {
+  /**
+   * Preprocesses HTML DOM before Prosemirror parsing to cleanup
+   * images and other elements.
+   *
+   * @param document The DOM document to preprocess.
+   */
+  private static preprocessHtmlForImport(document: Document): void {
+    // Handle images: filter emoticons, remove Jira icons, apply Confluence sizing
+    const images = document.querySelectorAll("img");
+    images.forEach((img) => {
+      const className = img.className || "";
+
+      // Skip emoticon images (they'll be dropped)
+      if (className.includes("emoticon")) {
+        img.remove();
+        return;
+      }
+
+      // Remove Jira icon images
+      if (
+        className === "icon" &&
+        img.parentElement?.className.includes("jira-issue-key")
+      ) {
+        img.remove();
+        return;
+      }
+
+      // Handle Confluence image sizing: data-width/data-height → width/height
+      const dataWidth = img.getAttribute("data-width");
+      const dataHeight = img.getAttribute("data-height");
+      const width = img.getAttribute("width");
+
+      if (dataWidth && dataHeight && width) {
+        const ratio = parseInt(dataWidth) / parseInt(width);
+        const calculatedHeight = Math.round(parseInt(dataHeight) / ratio);
+        img.setAttribute("height", String(calculatedHeight));
+      }
+    });
+  }
+
+  /**
+   * Attempts to convert content to HTML for formats that support it.
+   * Returns undefined for formats that should be parsed as markdown directly.
+   *
+   * @param content The content of the file.
+   * @param fileName The name of the file, including extension.
+   * @param mimeType The mime type of the file.
+   * @returns HTML string if convertible, undefined otherwise.
+   */
+  private static async convertToHtml(
+    content: Buffer | string,
+    fileName: string,
+    mimeType: string
+  ): Promise<string | undefined> {
+    // First try to convert based on the mime type
+    switch (mimeType) {
+      case "text/html":
+        return typeof content === "string" ? content : content.toString("utf8");
+      case "application/msword":
+        return this.confluenceToHtml(content);
+      case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+        return this.docxToHtml(content);
+      default:
+        break;
+    }
+
+    // Try to convert based on the file extension
+    const extension = fileName.split(".").pop();
+    switch (extension) {
+      case "html":
+        return typeof content === "string" ? content : content.toString("utf8");
+      case "docx":
+        return this.docxToHtml(content);
+      default:
+        return undefined;
+    }
+  }
+
+  /**
+   * Converts content to markdown for text-based formats.
+   *
+   * @param content The content of the file.
+   * @param fileName The name of the file, including extension.
+   * @param mimeType The mime type of the file.
+   * @returns Markdown string.
+   */
+  private static async convertToMarkdown(
+    content: Buffer | string,
+    fileName: string,
+    mimeType: string
+  ): Promise<string> {
+    switch (mimeType) {
+      case "text/plain":
+      case "text/markdown":
+        return this.bufferToString(content);
+      case "text/csv":
+        return this.csvToMarkdown(content);
+      default:
+        break;
+    }
+
+    const extension = fileName.split(".").pop();
+    switch (extension) {
+      case "md":
+      case "markdown":
+        return this.bufferToString(content);
+      default:
+        throw FileImportError(`File type ${mimeType} not supported`);
+    }
+  }
+
+  /**
+   * Convert a docx file to HTML using mammoth.
+   *
+   * @param content The docx file content as a Buffer.
+   * @returns The HTML representation of the document.
+   */
+  private static async docxToHtml(content: Buffer | string): Promise<string> {
+    if (content instanceof Buffer) {
+      const { value } = await traceFunction({ spanName: "convertToHtml" })(
+        mammoth.convertToHtml
+      )({
+        buffer: content,
+      });
+      return value;
+    }
+    throw FileImportError("Unsupported Word file");
+  }
+
+  /**
+   * Convert a Confluence Word export to HTML.
+   *
+   * @param content The Confluence Word export content.
+   * @returns The HTML representation of the document.
+   */
+  private static async confluenceToHtml(
+    content: Buffer | string
+  ): Promise<string> {
+    if (typeof content !== "string") {
+      content = content.toString("utf8");
+    }
+
+    // We're only supporting the output from Confluence here, regular Word documents should call
+    // into the docxToHtml importer. See: https://jira.atlassian.com/browse/CONFSERVER-38237
+    if (!content.includes("Content-Type: multipart/related")) {
+      throw FileImportError("Unsupported Word file");
+    }
+
+    // Confluence "Word" documents are actually just multi-part email messages, so we can use
+    // mailparser to parse the content.
+    const parsed = await simpleParser(content);
+    if (!parsed.html) {
+      throw FileImportError("Unsupported Word file (No content found)");
+    }
+
+    let html = parsed.html;
+
+    // Replace the content-location with a data URI for each attachment.
+    for (const attachment of parsed.attachments) {
+      const contentLocation = String(
+        attachment.headers.get("content-location") ?? ""
+      );
+
+      const id = contentLocation.split("/").pop();
+      if (!id) {
+        continue;
+      }
+
+      html = html.replace(
+        new RegExp(escapeRegExp(id), "g"),
+        `data:image/png;base64,${attachment.content.toString("base64")}`
+      );
+    }
+
+    return html;
+  }
+
+  /**
+   * Convert a CSV file to a markdown table.
+   *
+   * @param content The CSV file content.
+   * @returns A markdown table representation.
+   */
+  private static csvToMarkdown(content: Buffer | string): Promise<string> {
    return new Promise((resolve, reject) => {
-      const text = this.fileToMarkdown(content).trim();
-      const firstLine = text.split("\n")[0];
+      const text = this.bufferToString(content).trim();
+      const textLines = text.split("\n");
+
+      // Find the first non-empty line to determine the delimiter
+      const firstNonEmptyLine =
+        textLines.find((line) => line.trim().length > 0) || "";

      // Determine the separator used in the CSV file based on number of occurrences of each separator on first line
      const delimiter = [";", ",", "\t"].reduce(
        (acc, separator) => {
          const count = (
-            firstLine.match(new RegExp(escapeRegExp(separator), "g")) || []
+            firstNonEmptyLine.match(new RegExp(escapeRegExp(separator), "g")) ||
+            []
          ).length;
          return count > acc.count ? { count, separator } : acc;
        },
@@ -121,9 +323,64 @@ export class DocumentConverter {
        })
        .on("data", (row) => lines.push(row))
        .on("end", () => {
-          const headers = lines[0];
-          const table = lines
-            .slice(1)
+          // Filter out completely empty rows
+          const nonEmptyLines = lines.filter((row) =>
+            row.some((cell) => cell.trim() !== "")
+          );
+
+          if (nonEmptyLines.length === 0) {
+            resolve("");
+            return;
+          }
+
+          // Check if all rows have a trailing empty cell (trailing comma artifact)
+          // Only trim if ALL non-empty rows end with an empty cell
+          let trimmedLines = nonEmptyLines;
+          while (
+            trimmedLines.length > 0 &&
+            trimmedLines.every(
+              (row) => row.length > 0 && row[row.length - 1].trim() === ""
+            )
+          ) {
+            trimmedLines = trimmedLines.map((row) => row.slice(0, -1));
+          }
+
+          // Find the most common column count
+          const columnCounts = new Map<number, number>();
+          for (const row of trimmedLines) {
+            if (row.length > 0) {
+              columnCounts.set(
+                row.length,
+                (columnCounts.get(row.length) || 0) + 1
+              );
+            }
+          }
+
+          // Get the column count that appears most frequently
+          let expectedColumns = 0;
+          let maxFrequency = 0;
+          for (const [count, frequency] of columnCounts) {
+            if (frequency > maxFrequency) {
+              maxFrequency = frequency;
+              expectedColumns = count;
+            }
+          }
+
+          // Find the first row with the expected column count (this is the header)
+          const headerIndex = trimmedLines.findIndex(
+            (row) => row.length === expectedColumns
+          );
+          if (headerIndex === -1) {
+            resolve("");
+            return;
+          }
+
+          const headers = trimmedLines[headerIndex];
+          const dataRows = trimmedLines
+            .slice(headerIndex + 1)
+            .filter((row) => row.length === expectedColumns);
+
+          const table = dataRows
            .map((cells) => `| ${cells.join(" | ")} |`)
            .join("\n");

@@ -138,54 +395,13 @@ export class DocumentConverter {
    });
  }

-  public static fileToMarkdown(content: Buffer | string) {
-    if (typeof content !== "string") {
-      content = content.toString("utf8");
-    }
-    return content;
-  }
-
-  public static async confluenceToMarkdown(content: Buffer | string) {
-    if (typeof content !== "string") {
-      content = content.toString("utf8");
-    }
-
-    // We're only supporting the output from Confluence here, regular Word documents should call
-    // into the docxToMarkdown importer. See: https://jira.atlassian.com/browse/CONFSERVER-38237
-    if (!content.includes("Content-Type: multipart/related")) {
-      throw FileImportError("Unsupported Word file");
-    }
-
-    // Confluence "Word" documents are actually just multi-part email messages, so we can use
-    // mailparser to parse the content.
-    const parsed = await simpleParser(content);
-    if (!parsed.html) {
-      throw FileImportError("Unsupported Word file (No content found)");
-    }
-
-    // Replace the content-location with a data URI for each attachment.
-    for (const attachment of parsed.attachments) {
-      const contentLocation = String(
-        attachment.headers.get("content-location") ?? ""
-      );
-
-      const id = contentLocation.split("/").pop();
-      if (!id) {
-        continue;
-      }
-
-      parsed.html = parsed.html.replace(
-        new RegExp(escapeRegExp(id), "g"),
-        `data:image/png;base64,${attachment.content.toString("base64")}`
-      );
-    }
-
-    // If we don't remove the title here it becomes printed in the document
-    // body by turndown
-    turndownService.remove(["style", "title"]);
-
-    // Now we should have something that looks like HTML
-    const html = turndownService.turndown(parsed.html);
-    return html.replace(/<br>/g, " \\n ");
+  /**
+   * Convert a Buffer to a string.
+   *
+   * @param content The content as a Buffer or string.
+   * @returns The content as a string.
+   */
+  private static bufferToString(content: Buffer | string): string {
+    return typeof content === "string" ? content : content.toString("utf8");
  }
 }
@@ -1,15 +0,0 @@
-import type TurndownService from "turndown";
-
-/**
- * A turndown plugin for converting break tags to newlines.
- *
- * @param turndownService The TurndownService instance.
- */
-export default function breaks(turndownService: TurndownService) {
-  turndownService.addRule("breaks", {
-    filter: ["br"],
-    replacement() {
-      return "\\n";
-    },
-  });
-}
@@ -1,22 +0,0 @@
-import type TurndownService from "turndown";
-
-/**
- * A turndown plugin for unwrapping top-level empty list items.
- *
- * @param turndownService The TurndownService instance.
- */
-export default function emptyLists(turndownService: TurndownService) {
-  turndownService.addRule("empty-lists", {
-    filter(node) {
-      return (
-        node.nodeName === "LI" &&
-        node.childNodes.length === 1 &&
-        (node.firstChild?.nodeName === "OL" ||
-          node.firstChild?.nodeName === "UL")
-      );
-    },
-    replacement(content) {
-      return content;
-    },
-  });
-}
@@ -1,22 +0,0 @@
-import type TurndownService from "turndown";
-
-/**
- * A turndown plugin for converting paragraphs with only breaks to newlines.
- *
- * @param turndownService The TurndownService instance.
- */
-export default function emptyParagraphs(turndownService: TurndownService) {
-  turndownService.addRule("emptyParagraphs", {
-    filter(node) {
-      return (
-        node.nodeName === "P" &&
-        node.children.length === 1 &&
-        node.textContent?.trim() === "" &&
-        node.children[0].nodeName === "BR"
-      );
-    },
-    replacement() {
-      return "\n\n\\\n";
-    },
-  });
-}
@@ -1,21 +0,0 @@
-import type TurndownService from "turndown";
-
-/**
- * A turndown plugin to convert iframes to markdown links.
- *
- * @param turndownService The TurndownService instance.
- */
-export default function images(turndownService: TurndownService) {
-  turndownService.addRule("frames", {
-    filter: "iframe",
-    replacement(content, node: HTMLIFrameElement) {
-      const src = (node.getAttribute("src") || "").replace(/\n+/g, "");
-      const title = cleanAttribute(node.getAttribute("title") || "");
-      return src ? "[" + (title || src) + "]" + "(" + src + ")" : "";
-    },
-  });
-}
-
-function cleanAttribute(attribute: string) {
-  return attribute ? attribute.replace(/(\n+\s*)+/g, "\n") : "";
-}
@@ -1,50 +0,0 @@
-import type TurndownService from "turndown";
-
-/**
- * A turndown plugin overriding inbuilt image parsing behavior
- *
- * @param turndownService The TurndownService instance.
- */
-export default function images(turndownService: TurndownService) {
-  turndownService.addRule("image", {
-    filter(node) {
-      return node.nodeName === "IMG" && !node?.className.includes("emoticon");
-    },
-    replacement(content, node) {
-      if (!("className" in node)) {
-        return content;
-      }
-      const alt = cleanAttribute(node.getAttribute("alt") || "");
-      const src = cleanAttribute(node.getAttribute("src") || "");
-      const title = cleanAttribute(node.getAttribute("title") || "");
-
-      // Remove icons in issue keys as they will not resolve correctly and mess
-      // up the layout.
-      if (
-        node.className === "icon" &&
-        node.parentElement?.className.includes("jira-issue-key")
-      ) {
-        return "";
-      }
-
-      // Respect embedded Confluence image size
-      let size;
-      const naturalWidth = node.getAttribute("data-width");
-      const naturalHeight = node.getAttribute("data-height");
-      const width = node.getAttribute("width");
-
-      if (naturalWidth && naturalHeight && width) {
-        const ratio = parseInt(naturalWidth) / parseInt(width);
-        size = ` =${width}x${parseInt(naturalHeight) / ratio}`;
-      }
-
-      const titlePart = title || size ? ` "${title}${size}"` : "";
-
-      return src ? `![${alt}](${src}${titlePart})` : "";
-    },
-  });
-}
-
-function cleanAttribute(attribute: string) {
-  return (attribute ? attribute.replace(/\n+/g, "") : "").trim();
-}
@@ -1,47 +0,0 @@
-import { taskListItems, strikethrough } from "@joplin/turndown-plugin-gfm";
-import TurndownService from "turndown";
-import { escape } from "@shared/utils/markdown";
-import breaks from "./breaks";
-import emptyLists from "./emptyLists";
-import emptyParagraph from "./emptyParagraph";
-import frames from "./frames";
-import images from "./images";
-import inlineLink from "./inlineLink";
-import sanitizeLists from "./sanitizeLists";
-import sanitizeTables from "./sanitizeTables";
-import tables from "./tables";
-import underlines from "./underlines";
-import { inHtmlContext } from "./utils";
-
-/**
- * Turndown converts HTML to Markdown and is used in the importer code.
- *
- * For options, see: https://github.com/domchristie/turndown#options
- */
-const service = new TurndownService({
-  hr: "---",
-  bulletListMarker: "-",
-  headingStyle: "atx",
-  codeBlockStyle: "fenced",
-  blankReplacement: (_, node) =>
-    node.nodeName === "P" && !inHtmlContext(node as HTMLElement, "td, th")
-      ? "\n\n\\\n"
-      : "",
-})
-  .remove(["script", "style", "title", "head"])
-  .use(taskListItems)
-  .use(strikethrough)
-  .use(tables)
-  .use(inlineLink)
-  .use(emptyParagraph)
-  .use(sanitizeTables)
-  .use(sanitizeLists)
-  .use(underlines)
-  .use(frames)
-  .use(images)
-  .use(breaks)
-  .use(emptyLists);
-
-service.escape = escape;
-
-export default service;
@@ -1,22 +0,0 @@
-import type TurndownService from "turndown";
-
-/**
- * A turndown plugin for converting anchors to inline links without a title.
- *
- * @param turndownService The TurndownService instance.
- */
-export default function underlines(turndownService: TurndownService) {
-  turndownService.addRule("inlineLink", {
-    filter(node, options) {
-      return !!(
-        options.linkStyle === "inlined" &&
-        node.nodeName === "A" &&
-        node.getAttribute("href")
-      );
-    },
-    replacement(content, node: HTMLElement) {
-      const href = node.getAttribute("href");
-      return "[" + content + "](" + href + ")";
-    },
-  });
-}
@@ -1,64 +0,0 @@
-import type TurndownService from "turndown";
-import { inHtmlContext } from "./utils";
-
-/**
- * A turndown plugin for removing incompatible nodes from lists.
- *
- * @param turndownService The TurndownService instance.
- */
-export default function sanitizeLists(turndownService: TurndownService) {
-  // Fork of default functionality to only use a single space between marker and content
-  // See: https://github.com/mixmark-io/turndown/blob/cc73387fb707e5fb5e1083e94078d08f38f3abc8/src/commonmark-rules.js#L61
-  turndownService.addRule("listItem", {
-    filter: "li",
-
-    replacement(content, node, options) {
-      content = content
-        .replace(/^\n+/, "") // remove leading newlines
-        .replace(/\n+$/, "\n") // replace trailing newlines with just a single one
-        .replace(/\n/gm, "\n    "); // 4 space indent
-
-      let prefix = options.bulletListMarker + " ";
-      const parent = node.parentNode;
-      if (parent && parent.nodeName === "OL") {
-        const start = (parent as HTMLElement).getAttribute("start");
-        const index = Array.prototype.indexOf.call(parent.children, node);
-        prefix = (start ? Number(start) + index : index + 1) + ". ";
-      }
-      const output =
-        prefix +
-        content +
-        (node.nextSibling && !/\n$/.test(content) ? "\n" : "");
-      return output;
-    },
-  });
-
-  turndownService.addRule("headingsInLists", {
-    filter(node) {
-      return (
-        ["H1", "H2", "H3", "H4", "H5", "H6"].includes(node.nodeName) &&
-        inHtmlContext(node, "LI")
-      );
-    },
-    replacement(content, node, options) {
-      if (!content.trim()) {
-        return "";
-      }
-      return options.strongDelimiter + content + options.strongDelimiter;
-    },
-  });
-
-  turndownService.addRule("strongInHeadings", {
-    filter(node) {
-      return (
-        (node.nodeName === "STRONG" || node.nodeName === "B") &&
-        ["H1", "H2", "H3", "H4", "H5", "H6"].some((tag) =>
-          inHtmlContext(node, tag)
-        )
-      );
-    },
-    replacement(content) {
-      return content;
-    },
-  });
-}
@@ -1,30 +0,0 @@
-import type TurndownService from "turndown";
-import { inHtmlContext } from "./utils";
-
-/**
- * A turndown plugin for removing incompatible nodes from tables.
- *
- * @param turndownService The TurndownService instance.
- */
-export default function sanitizeTables(turndownService: TurndownService) {
-  turndownService.addRule("headingsInTables", {
-    filter(node) {
-      return (
-        ["H1", "H2", "H3", "H4", "H5", "H6"].includes(node.nodeName) &&
-        inHtmlContext(node, "table")
-      );
-    },
-    replacement(content) {
-      return `**${content.trim()}**`;
-    },
-  });
-
-  turndownService.addRule("paragraphsInCells", {
-    filter(node) {
-      return node.nodeName === "P" && inHtmlContext(node, "table");
-    },
-    replacement(content, node) {
-      return content.trim() + (node.nextSibling ? "\\n" : "");
-    },
-  });
-}
@@ -1,325 +0,0 @@
-// Based on https://www.npmjs.com/package/joplin-turndown-plugin-gfm
-import type TurndownService from "turndown";
-import { inHtmlContext } from "./utils";
-
-const rules: Record<string, TurndownService.Rule> = {};
-const alignMap = { left: ":---", right: "---:", center: ":---:" };
-
-// Note use of WeakMap to enable garbage collection
-const tableShouldBeSkippedCache = new WeakMap<HTMLTableElement, boolean>();
-
-function getAlignment(node: HTMLElement) {
-  return node
-    ? ((
-        node.getAttribute("align") ||
-        node.style.textAlign ||
-        ""
-      ).toLowerCase() as "left" | "right" | "center")
-    : "";
-}
-
-function getBorder(alignment: keyof typeof alignMap) {
-  return alignment ? alignMap[alignment] : "---";
-}
-
-function getColumnAlignment(
-  table: HTMLTableElement | null,
-  columnIndex: number
-) {
-  const votes = {
-    left: 0,
-    right: 0,
-    center: 0,
-    "": 0,
-  };
-
-  let align: keyof typeof alignMap = "left";
-  if (!table) {
-    return align;
-  }
-
-  // Reference is important as .rows is an expensive getter.
-  const rows = table.rows;
-
-  for (let i = 0; i < rows.length; ++i) {
-    const row = rows[i];
-    if (columnIndex < row.childNodes.length) {
-      const cellAlignment = getAlignment(
-        row.childNodes[columnIndex] as HTMLElement
-      );
-      ++votes[cellAlignment];
-
-      if (
-        votes[cellAlignment] > votes[align] &&
-        Object.keys(alignMap).includes(cellAlignment)
-      ) {
-        align = cellAlignment as keyof typeof alignMap;
-      }
-    }
-  }
-
-  return align;
-}
-
-rules.tableCell = {
-  filter: ["th", "td"],
-  replacement(content, node: HTMLTableCellElement) {
-    if (tableShouldBeSkipped(nodeParentTable(node))) {
-      return content;
-    }
-    return cell(content, node);
-  },
-};
-
-rules.tableRow = {
-  filter: "tr",
-  replacement(content, node: HTMLTableRowElement) {
-    const parentTable = nodeParentTable(node);
-    if (tableShouldBeSkipped(parentTable)) {
-      return content;
-    }
-
-    let borderCells = "";
-
-    if (isHeadingRow(node)) {
-      const colCount = tableColCount(parentTable);
-      for (let i = 0; i < colCount; i++) {
-        const childNode =
-          i < node.childNodes.length ? node.childNodes[i] : null;
-        const border = getBorder(getColumnAlignment(parentTable, i));
-        borderCells += cell(border, childNode, i);
-      }
-    }
-    return "\n" + content + (borderCells ? "\n" + borderCells : "");
-  },
-};
-
-rules.table = {
-  // Only convert tables that can result in valid Markdown
-  // Other tables are kept as HTML using `keep` (see below).
-  filter(node) {
-    return node.nodeName === "TABLE" && !tableShouldBeHtml(node);
-  },
-
-  replacement(content, node: HTMLTableElement) {
-    if (tableShouldBeSkipped(node)) {
-      return content;
-    }
-
-    // Ensure there are no blank lines
-    content = content.replace(/\n+/g, "\n");
-
-    // If table has no heading, add an empty one so as to get a valid Markdown table
-    const secondLineParts = content.trim().split("\n");
-    let secondLine = "";
-    if (secondLineParts.length >= 2) {
-      secondLine = secondLineParts[1];
-    }
-    const secondLineIsDivider = /\| :?---/.test(secondLine);
-
-    const columnCount = tableColCount(node);
-    let emptyHeader = "";
-    if (columnCount && !secondLineIsDivider) {
-      emptyHeader = "|" + "     |".repeat(columnCount) + "\n" + "|";
-      for (let columnIndex = 0; columnIndex < columnCount; ++columnIndex) {
-        emptyHeader +=
-          " " + getBorder(getColumnAlignment(node, columnIndex)) + " |";
-      }
-    }
-
-    return "\n\n" + emptyHeader + content + "\n\n";
-  },
-};
-
-rules.tableSection = {
-  filter: ["thead", "tbody", "tfoot"],
-  replacement(content) {
-    return content;
-  },
-};
-
-/**
- * A tr is a heading row if the parent is a THEAD or its the first child of the TABLE or the first
- * TBODY (possibly following a blank THEAD) and every cell is a TH.
- *
- * @param tr The tr node to check
- * @returns Whether the tr is a heading row
- */
-function isHeadingRow(tr: Node) {
-  const parentNode = tr.parentNode;
-  if (!parentNode) {
-    return false;
-  }
-
-  return (
-    parentNode.nodeName === "THEAD" ||
-    Array.from(tr.childNodes).every((n) => n.nodeName === "TH")
-  );
-}
-
-function cell(
-  content: string,
-  node: ChildNode | null = null,
-  index: number | null = null
-) {
-  if (index === null && node) {
-    index = Array.from(node?.parentNode?.childNodes ?? []).indexOf(node);
-  }
-  let prefix = " ";
-  if (index === 0) {
-    prefix = "| ";
-  }
-  let filteredContent = content
-    .trim()
-    .replace(/\n\r/g, "<br>")
-    .replace(/\n/g, "<br>");
-  filteredContent = filteredContent.replace(/\|+/g, "\\|");
-  while (filteredContent.length < 3) {
-    filteredContent += " ";
-  }
-  if (node) {
-    filteredContent = handleColSpan(filteredContent, node, " ");
-  }
-  return prefix + filteredContent + " |";
-}
-
-function nodeContainsTable(node: Node) {
-  if (!node?.childNodes) {
-    return false;
-  }
-
-  for (let i = 0; i < node.childNodes.length; i++) {
-    const child = node.childNodes[i];
-    if (child.nodeName === "TABLE") {
-      return true;
-    }
-    if (nodeContainsTable(child)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-const nodeContains = (node: HTMLElement, types: string | string[]) => {
-  if (!node?.childNodes) {
-    return false;
-  }
-
-  for (let i = 0; i < node.childNodes.length; i++) {
-    const child = node.childNodes[i] as HTMLElement;
-    if (types === "code" && inHtmlContext(child, "CODE")) {
-      return true;
-    }
-    if (types.includes(child.nodeName)) {
-      return true;
-    }
-    if (nodeContains(child, types)) {
-      return true;
-    }
-  }
-
-  return false;
-};
-
-const tableShouldBeHtml = (tableNode: HTMLElement) =>
-  nodeContains(tableNode, ["code", "table"]);
-
-// Various conditions under which a table should be skipped - i.e. each cell
-// will be rendered one after the other as if they were paragraphs.
-function tableShouldBeSkipped(tableNode: HTMLTableElement | null) {
-  if (!tableNode) {
-    return true;
-  }
-
-  const cached = tableShouldBeSkippedCache.get(tableNode);
-  if (cached !== undefined) {
-    return cached;
-  }
-
-  const process = () => {
-    if (!tableNode) {
-      return true;
-    }
-
-    // Reference is important as .rows is an expensive getter.
-    const rows = tableNode.rows;
-
-    if (!rows) {
-      return true;
-    }
-    if (rows.length === 1 && rows[0].childNodes.length <= 1) {
-      return true;
-    }
-    if (nodeContainsTable(tableNode)) {
-      return true;
-    }
-    return false;
-  };
-
-  const result = process();
-  tableShouldBeSkippedCache.set(tableNode, result);
-  return result;
-}
-
-function nodeParentTable(
-  node: HTMLTableCellElement | HTMLTableRowElement
-): HTMLTableElement | null {
-  let parent = node.parentNode;
-  if (!parent) {
-    return null;
-  }
-
-  while (parent.nodeName !== "TABLE") {
-    parent = parent.parentNode;
-    if (!parent) {
-      return null;
-    }
-  }
-
-  return parent as HTMLTableElement;
-}
-
-function handleColSpan(content: string, node: ChildNode, emptyChar: string) {
-  if (!node) {
-    return content;
-  }
-
-  const colspan = Number((node as HTMLElement).getAttribute("colspan") || 1);
-  for (let i = 1; i < colspan; i++) {
-    content += " | " + emptyChar.repeat(3);
-  }
-  return content;
-}
-
-function tableColCount(node: HTMLTableElement | null) {
-  if (!node) {
-    return 0;
-  }
-
-  let maxColCount = 0;
-
-  // Reference is important as .rows is an expensive getter.
-  const rows = node.rows;
-
-  for (let i = 0; i < rows.length; i++) {
-    const row = rows[i];
-    const colCount = row.childNodes.length;
-    if (colCount > maxColCount) {
-      maxColCount = colCount;
-    }
-  }
-  return maxColCount;
-}
-
-export default function tables(turndownService: TurndownService) {
-  turndownService.keep(function (node) {
-    if (node.nodeName === "TABLE" && tableShouldBeHtml(node)) {
-      return true;
-    }
-    return false;
-  });
-
-  for (const key in rules) {
-    turndownService.addRule(key, rules[key]);
-  }
-}
@@ -1,15 +0,0 @@
-import type TurndownService from "turndown";
-
-/**
- * A turndown plugin for converting u tags to underlines.
- *
- * @param turndownService The TurndownService instance.
- */
-export default function underlines(turndownService: TurndownService) {
-  turndownService.addRule("underlines", {
-    filter: ["u"],
-    replacement(content) {
-      return `__${content.trim()}__`;
-    },
-  });
-}
@@ -1,13 +0,0 @@
-export function inHtmlContext(node: HTMLElement, selector: string) {
-  let currentNode = node;
-  // start at the closest element
-  while (currentNode !== null && currentNode.nodeType !== 1) {
-    currentNode = (currentNode.parentElement ||
-      currentNode.parentNode) as HTMLElement;
-  }
-  return (
-    currentNode !== null &&
-    currentNode.nodeType === 1 &&
-    currentNode.closest(selector) !== null
-  );
-}
@@ -4079,13 +4079,6 @@ __metadata:
  languageName: node
  linkType: hard

-"@joplin/turndown-plugin-gfm@npm:^1.0.49":
-  version: 1.0.64
-  resolution: "@joplin/turndown-plugin-gfm@npm:1.0.64"
-  checksum: 10c0/cbbcba0f111e420535fc1693c1ff859ca7fae5a869a7891ec6cd9ead2f94cec8e858938dcb6ba379c160d086a7fbfadadd53cc9e79d755100a4d0b1cf77947fc
-  languageName: node
-  linkType: hard
-
 "@jridgewell/gen-mapping@npm:^0.3.12, @jridgewell/gen-mapping@npm:^0.3.5":
  version: 0.3.13
  resolution: "@jridgewell/gen-mapping@npm:0.3.13"
@@ -4368,13 +4361,6 @@ __metadata:
  languageName: node
  linkType: hard

-"@mixmark-io/domino@npm:^2.2.0":
-  version: 2.2.0
-  resolution: "@mixmark-io/domino@npm:2.2.0"
-  checksum: 10c0/aa468a15f9217d425220fe6a4b3f9416cbe8e566ee14efc191c6d5cc04fe39338b16a90bbac190f28d44e69465db5f2cf95f479c621ce38060ca6b2a3d346e9d
-  languageName: node
-  linkType: hard
-
 "@msgpackr-extract/msgpackr-extract-darwin-arm64@npm:3.0.3":
  version: 3.0.3
  resolution: "@msgpackr-extract/msgpackr-extract-darwin-arm64@npm:3.0.3"
@@ -8774,13 +8760,6 @@ __metadata:
  languageName: node
  linkType: hard

-"@types/turndown@npm:^5.0.6":
-  version: 5.0.6
-  resolution: "@types/turndown@npm:5.0.6"
-  checksum: 10c0/cc5648c115b67ba413782fd0a8ae273ad6b87940df770ab9a5fefe0303c368704013fca2a55dd08f46a2132a747912fd47f96a83162c47fd189babf1352ac4be
-  languageName: node
-  linkType: hard
-
 "@types/unist@npm:^2":
  version: 2.0.11
  resolution: "@types/unist@npm:2.0.11"
@@ -17313,7 +17292,6 @@ __metadata:
    "@hocuspocus/extension-throttle": "npm:1.1.2"
    "@hocuspocus/provider": "npm:1.1.2"
    "@hocuspocus/server": "npm:1.1.2"
-    "@joplin/turndown-plugin-gfm": "npm:^1.0.49"
    "@juggle/resize-observer": "npm:^3.4.0"
    "@linear/sdk": "npm:^58.1.0"
    "@node-oauth/oauth2-server": "npm:^5.2.0"
@@ -17404,7 +17382,6 @@ __metadata:
    "@types/styled-components": "npm:^5.1.32"
    "@types/throng": "npm:^5.0.7"
    "@types/tmp": "npm:^0.2.6"
-    "@types/turndown": "npm:^5.0.6"
    "@types/utf8": "npm:^3.0.3"
    "@types/validator": "npm:^13.15.3"
    "@types/yauzl": "npm:^2.10.3"
@@ -17580,7 +17557,6 @@ __metadata:
    tiny-cookie: "npm:^2.5.1"
    tmp: "npm:^0.2.5"
    tunnel-agent: "npm:^0.6.0"
-    turndown: "npm:^7.2.2"
    typescript: "npm:^5.9.2"
    ukkonen: "npm:^2.2.0"
    umzug: "npm:^3.8.2"
@@ -21354,15 +21330,6 @@ __metadata:
  languageName: node
  linkType: hard

-"turndown@npm:^7.2.2":
-  version: 7.2.2
-  resolution: "turndown@npm:7.2.2"
-  dependencies:
-    "@mixmark-io/domino": "npm:^2.2.0"
-  checksum: 10c0/ee09f7bd67c468505aad6c3a26b11269ca49ffce07eaa9c212926d068f242b11b4e955b31a58289f26674ff29f91209b29454907551dcaec7da712e524cc78c2
-  languageName: node
-  linkType: hard
-
 "type-detect@npm:4.0.8":
  version: 4.0.8
  resolution: "type-detect@npm:4.0.8"