chore: Refactor Markdown importer to use new import pipeline (#12361)

* chore: Refactor Markdown importer to use new import pipeline --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-13 03:14:59 +03:00 · 2026-05-16 14:10:15 -04:00
parent 170f59c6ba
commit 82d7041b6b
20 changed files with 1224 additions and 508 deletions
@@ -6,7 +6,12 @@ import { useTranslation } from "react-i18next";
 import { toast } from "sonner";
 import styled from "styled-components";
 import { s } from "@shared/styles";
-import { AttachmentPreset, CollectionPermission } from "@shared/types";
+import {
+  AttachmentPreset,
+  CollectionPermission,
+  FileOperationFormat,
+  IntegrationService,
+} from "@shared/types";
 import { bytesToHumanReadable } from "@shared/utils/files";
 import Button from "~/components/Button";
 import Flex from "~/components/Flex";
@@ -27,7 +32,7 @@ type Props = {

 function DropToImport({ disabled, onSubmit, children, format }: Props) {
  const { t } = useTranslation();
-  const { collections } = useStores();
+  const { collections, imports } = useStores();
  const [file, setFile] = useState<File | null>(null);
  const [isImporting, setImporting] = useState(false);
  const [permission, setPermission] = useState<CollectionPermission | null>(
@@ -53,7 +58,19 @@ function DropToImport({ disabled, onSubmit, children, format }: Props) {
        name: file.name,
        preset: AttachmentPreset.WorkspaceImport,
      });
-      await collections.import(attachment.id, { format, permission });
+
+      if (format === FileOperationFormat.MarkdownZip) {
+        await imports.create(
+          { service: IntegrationService.Markdown },
+          {
+            attachmentId: attachment.id,
+            permission: permission ?? undefined,
+          }
+        );
+      } else {
+        await collections.import(attachment.id, { format, permission });
+      }
+
      onSubmit();
      toast.message(file.name, {
        description: t(
@@ -30,6 +30,10 @@ export class NotionImportsProcessor extends ImportsProcessor<IntegrationService.
    importModel: Import<IntegrationService.Notion>,
    transaction: Transaction
  ): Promise<NotionImportTaskInput> {
+    if (!importModel.integrationId) {
+      throw new Error("Notion import is missing integrationId");
+    }
+
    const integration = await Integration.scope("withAuthentication").findByPk(
      importModel.integrationId,
      { rejectOnEmpty: true }
@@ -29,15 +29,19 @@ export default class NotionAPIImportTask extends APIImportTask<IntegrationServic
  ];

  /**
-   * Process the Notion import task.
+   * Process a Notion page-phase import task.
   * This fetches data from Notion and converts it to task output.
   *
   * @param importTask ImportTask model to process.
   * @returns Promise with output that resolves once processing has completed.
   */
-  protected async process(
+  protected async processPage(
    importTask: ImportTask<IntegrationService.Notion>
  ): Promise<ProcessOutput<IntegrationService.Notion>> {
+    if (!importTask.import.integrationId) {
+      throw new Error("Notion import is missing integrationId");
+    }
+
    const integration = await Integration.scope("withAuthentication").findByPk(
      importTask.import.integrationId,
      { rejectOnEmpty: true }
@@ -47,7 +51,7 @@ export default class NotionAPIImportTask extends APIImportTask<IntegrationServic

    const parsedPages: (ParsePageOutput | null)[] = [];
    for (const item of importTask.input) {
-      parsedPages.push(await this.processPage({ item, client }));
+      parsedPages.push(await this.parsePage({ item, client }));
    }

    // Filter out any null results (from pages/databases that couldn't be accessed)
@@ -56,7 +60,7 @@ export default class NotionAPIImportTask extends APIImportTask<IntegrationServic
    const taskOutput: ImportTaskOutput = validParsedPages.map((parsedPage) => ({
      externalId: parsedPage.externalId,
      title: parsedPage.title,
-      emoji: parsedPage.emoji,
+      icon: parsedPage.icon,
      content: parsedPage.content,
      author: parsedPage.author,
      createdAt: parsedPage.createdAt,
@@ -96,7 +100,7 @@ export default class NotionAPIImportTask extends APIImportTask<IntegrationServic
   * @param client Notion client.
   * @returns Promise of parsed page output that resolves when the task is scheduled.
   */
-  private async processPage({
+  private async parsePage({
    item,
    client,
  }: {
@@ -112,13 +116,14 @@ export default class NotionAPIImportTask extends APIImportTask<IntegrationServic
    try {
      // Convert Notion database to an empty page with "pages in database" as its children.
      if (item.type === PageType.Database) {
-        const { pages, ...databaseInfo } = await client.fetchDatabase(
+        const { pages, emoji, ...databaseInfo } = await client.fetchDatabase(
          item.externalId,
          { titleMaxLength }
        );

        return {
          ...databaseInfo,
+          icon: emoji,
          externalId: item.externalId,
          content: ProsemirrorHelper.getEmptyDocument() as ProsemirrorDoc,
          collectionExternalId,
@@ -129,12 +134,14 @@ export default class NotionAPIImportTask extends APIImportTask<IntegrationServic
        };
      }

-      const { blocks, ...pageInfo } = await client.fetchPage(item.externalId, {
-        titleMaxLength,
-      });
+      const { blocks, emoji, ...pageInfo } = await client.fetchPage(
+        item.externalId,
+        { titleMaxLength }
+      );

      return {
        ...pageInfo,
+        icon: emoji,
        externalId: item.externalId,
        content: NotionConverter.page({ children: blocks } as NotionPage),
        collectionExternalId,
@@ -0,0 +1,16 @@
+"use strict";
+
+/** @type {import('sequelize-cli').Migration} */
+module.exports = {
+  async up(queryInterface) {
+    await queryInterface.sequelize.query(
+      `ALTER TABLE "imports" ALTER COLUMN "integrationId" DROP NOT NULL`
+    );
+  },
+
+  async down(queryInterface) {
+    await queryInterface.sequelize.query(
+      `ALTER TABLE "imports" ALTER COLUMN "integrationId" SET NOT NULL`
+    );
+  },
+};
@@ -0,0 +1,21 @@
+"use strict";
+
+/** @type {import('sequelize-cli').Migration} */
+module.exports = {
+  async up(queryInterface, Sequelize) {
+    await queryInterface.addColumn("import_tasks", "phase", {
+      type: Sequelize.STRING,
+      allowNull: false,
+      defaultValue: "page",
+    });
+    await queryInterface.addColumn("imports", "scratch", {
+      type: Sequelize.JSONB,
+      allowNull: true,
+    });
+  },
+
+  async down(queryInterface) {
+    await queryInterface.removeColumn("imports", "scratch");
+    await queryInterface.removeColumn("import_tasks", "phase");
+  },
+};
@@ -1,5 +1,6 @@
 import type { InferAttributes, InferCreationAttributes } from "sequelize";
 import {
+  AllowNull,
  BelongsTo,
  Column,
  DataType,
@@ -10,7 +11,7 @@ import {
  IsNumeric,
  Table,
 } from "sequelize-typescript";
-import { type ImportInput } from "@shared/schema";
+import { type ImportInput, type ImportScratch } from "@shared/schema";
 import { ImportableIntegrationService, ImportState } from "@shared/types";
 import { ImportValidation } from "@shared/validations";
 import Integration from "./Integration";
@@ -55,6 +56,10 @@ class Import<T extends ImportableIntegrationService> extends ParanoidModel<
  @Column(DataType.JSONB)
  input: ImportInput<T>;

+  @AllowNull
+  @Column(DataType.JSONB)
+  scratch: ImportScratch<T> | null;
+
  @IsNumeric
  @Default(0)
  @Column(DataType.INTEGER)
@@ -66,11 +71,12 @@ class Import<T extends ImportableIntegrationService> extends ParanoidModel<
  // associations

  @BelongsTo(() => Integration, "integrationId")
-  integration: Integration;
+  integration: Integration | null;

+  @AllowNull
  @ForeignKey(() => Integration)
  @Column(DataType.UUID)
-  integrationId: string;
+  integrationId: string | null;

  @BelongsTo(() => User, "createdById")
  createdBy: User;
@@ -11,7 +11,7 @@ import {
 import type { ImportTaskOutput } from "@shared/schema";
 import { type ImportTaskInput } from "@shared/schema";
 import type { ImportableIntegrationService } from "@shared/types";
-import { ImportTaskState } from "@shared/types";
+import { ImportTaskPhase, ImportTaskState } from "@shared/types";
 import Import from "./Import";
 import IdModel from "./base/IdModel";
 import Fix from "./decorators/Fix";
@@ -40,6 +40,10 @@ class ImportTask<T extends ImportableIntegrationService> extends IdModel<
  @Column(DataType.STRING)
  state: ImportTaskState;

+  @IsIn([Object.values(ImportTaskPhase)])
+  @Column(DataType.STRING)
+  phase: ImportTaskPhase;
+
  @Column(DataType.JSONB)
  input: ImportTaskInput<T>;

@@ -5,7 +5,6 @@ import ExportHTMLZipTask from "../tasks/ExportHTMLZipTask";
 import ExportJSONTask from "../tasks/ExportJSONTask";
 import ExportMarkdownZipTask from "../tasks/ExportMarkdownZipTask";
 import ImportJSONTask from "../tasks/ImportJSONTask";
-import ImportMarkdownZipTask from "../tasks/ImportMarkdownZipTask";
 import BaseProcessor from "./BaseProcessor";

 export default class FileOperationCreatedProcessor extends BaseProcessor {
@@ -19,14 +18,11 @@ export default class FileOperationCreatedProcessor extends BaseProcessor {
      }
    );

-    // map file operation type and format to the appropriate task
+    // map file operation type and format to the appropriate task. Markdown
+    // zip imports flow through the API-import pipeline (`imports.create` →
+    // MarkdownAPIImportTask) and never reach this dispatcher.
    if (fileOperation.type === FileOperationType.Import) {
      switch (fileOperation.format) {
-        case FileOperationFormat.MarkdownZip:
-          await new ImportMarkdownZipTask().schedule({
-            fileOperationId: event.modelId,
-          });
-          break;
        case FileOperationFormat.JSON:
          await new ImportJSONTask().schedule({
            fileOperationId: event.modelId,
@@ -5,14 +5,24 @@ import type { CreateOptions, CreationAttributes, Transaction } from "sequelize";
 import { UniqueConstraintError } from "sequelize";
 import { randomUUID } from "node:crypto";
 import { randomElement } from "@shared/random";
-import type { ImportInput, ImportTaskInput } from "@shared/schema";
+import type {
+  BaseImportInput,
+  BaseImportTaskInput,
+  ImportInput,
+  ImportTaskInput,
+} from "@shared/schema";
 import type {
  ImportableIntegrationService,
  ProsemirrorData,
  ProsemirrorDoc,
  SourceMetadata,
 } from "@shared/types";
-import { ImportState, ImportTaskState, MentionType } from "@shared/types";
+import {
+  ImportState,
+  ImportTaskPhase,
+  ImportTaskState,
+  MentionType,
+} from "@shared/types";
 import { colorPalette } from "@shared/utils/collections";
 import { CollectionValidation } from "@shared/validations";
 import { createContext } from "@server/context";
@@ -118,22 +128,26 @@ export default abstract class ImportsProcessor<
    }

    const tasksInput = await this.buildTasksInput(importModel, transaction);
+    const phase = this.getInitialPhase();

    const importTasks = await Promise.all(
-      chunk(tasksInput, PagePerImportTask).map((input) => {
-        const attrs = {
-          state: ImportTaskState.Created,
-          input,
-          importId: importModel.id,
-        } as ImportTaskCreationAttributes<T>;
+      chunk(tasksInput as BaseImportTaskInput, PagePerImportTask).map(
+        (input) => {
+          const attrs = {
+            state: ImportTaskState.Created,
+            phase,
+            input,
+            importId: importModel.id,
+          } as ImportTaskCreationAttributes<T>;

-        return ImportTask.create<
-          ImportTask<T>,
-          CreateOptions<ImportTaskAttributes<T>>
-        >(attrs as unknown as CreationAttributes<ImportTask<T>>, {
-          transaction,
-        });
-      })
+          return ImportTask.create<
+            ImportTask<T>,
+            CreateOptions<ImportTaskAttributes<T>>
+          >(attrs as unknown as CreationAttributes<ImportTask<T>>, {
+            transaction,
+          });
+        }
+      )
    );

    importModel.state = ImportState.InProgress;
@@ -271,8 +285,12 @@ export default abstract class ImportsProcessor<
    const createdCollections: Collection[] = [];
    // External id to internal model id.
    const idMap: Record<string, string> = {};
-    // These will be imported as collections.
-    const importInput = keyBy(importModel.input, "externalId");
+    // These will be imported as collections. Widened to the base input shape
+    // because the abstract class has no narrowed view of T.
+    const importInput = keyBy(
+      importModel.input as BaseImportInput,
+      "externalId"
+    );
    const ctx = createContext({ user: importModel.createdBy, transaction });

    const firstCollection = await Collection.findFirstCollectionForUser(
@@ -361,8 +379,8 @@ export default abstract class ImportsProcessor<
              const collection = Collection.build({
                id: internalId,
                name: output.title,
-                icon: output.emoji ?? "collection",
-                color: output.emoji ? undefined : randomElement(colorPalette),
+                icon: output.icon ?? "collection",
+                color: output.icon ? undefined : randomElement(colorPalette),
                content: transformedContent,
                description: truncate(description, {
                  length: CollectionValidation.maxDescriptionLength,
@@ -403,7 +421,7 @@ export default abstract class ImportsProcessor<

            const defaults = {
              title: output.title,
-              icon: output.emoji,
+              icon: output.icon,
              content: transformedContent,
              text: await DocumentHelper.toMarkdown(transformedContent, {
                includeTitle: false,
@@ -602,6 +620,18 @@ export default abstract class ImportsProcessor<
   */
  protected abstract canProcess(importModel: Import<T>): boolean;

+  /**
+   * Phase assigned to the initial ImportTask rows created from
+   * `buildTasksInput`. Sources that begin with a bootstrap step (e.g.
+   * Markdown zip extraction) override this to return `Bootstrap`. Sources
+   * that fan out directly into page work (e.g. Notion) leave the default.
+   *
+   * @returns Phase for the first wave of ImportTask rows.
+   */
+  protected getInitialPhase(): ImportTaskPhase {
+    return ImportTaskPhase.Page;
+  }
+
  /**
   * Build task inputs which will be used for `APIImportTask`s.
   *
@@ -0,0 +1,37 @@
+import type { Transaction } from "sequelize";
+import type { ImportTaskInput } from "@shared/schema";
+import { ImportTaskPhase, IntegrationService } from "@shared/types";
+import type { Import, ImportTask } from "@server/models";
+import MarkdownAPIImportTask from "../tasks/MarkdownAPIImportTask";
+import ImportsProcessor from "./ImportsProcessor";
+
+export default class MarkdownImportsProcessor extends ImportsProcessor<IntegrationService.Markdown> {
+  protected canProcess(
+    importModel: Import<IntegrationService.Markdown>
+  ): boolean {
+    return importModel.service === IntegrationService.Markdown;
+  }
+
+  protected getInitialPhase(): ImportTaskPhase {
+    return ImportTaskPhase.Bootstrap;
+  }
+
+  protected async buildTasksInput(
+    importModel: Import<IntegrationService.Markdown>,
+    _transaction: Transaction
+  ): Promise<ImportTaskInput<IntegrationService.Markdown>> {
+    if (!importModel.scratch?.storageKey) {
+      throw new Error(
+        "Markdown import is missing scratch.storageKey for the bootstrap phase"
+      );
+    }
+
+    return [{ externalId: importModel.input[0].externalId }];
+  }
+
+  protected async scheduleTask(
+    importTask: ImportTask<IntegrationService.Markdown>
+  ): Promise<void> {
+    await new MarkdownAPIImportTask().schedule({ importTaskId: importTask.id });
+  }
+}
@@ -4,13 +4,22 @@ import { Fragment, Node } from "prosemirror-model";
 import type { WhereOptions } from "sequelize";
 import { Transaction } from "sequelize";
 import { randomUUID } from "node:crypto";
-import type { ImportTaskInput, ImportTaskOutput } from "@shared/schema";
+import type {
+  BaseImportTaskInput,
+  ImportTaskInput,
+  ImportTaskOutput,
+} from "@shared/schema";
 import type {
  ImportableIntegrationService,
  ProsemirrorData,
  ProsemirrorDoc,
 } from "@shared/types";
-import { AttachmentPreset, ImportState, ImportTaskState } from "@shared/types";
+import {
+  AttachmentPreset,
+  ImportState,
+  ImportTaskPhase,
+  ImportTaskState,
+} from "@shared/types";
 import { createContext } from "@server/context";
 import { schema } from "@server/editor";
 import Logger from "@server/logging/Logger";
@@ -134,31 +143,39 @@ export default abstract class APIImportTask<
   * @returns Promise that resolves once processing has completed.
   */
  private async onProcess(importTask: ImportTask<T>) {
-    const { taskOutput, childTasksInput } = await this.process(importTask);
+    const { taskOutput, childTasksInput } =
+      importTask.phase === ImportTaskPhase.Bootstrap
+        ? await this.processBootstrap(importTask)
+        : await this.processPage(importTask);

-    const taskOutputWithReplacements = await Promise.all(
-      taskOutput.map(async (item) => ({
-        ...item,
-        content: await this.uploadAttachments({
-          doc: item.content,
-          externalId: item.externalId,
-          createdBy: importTask.import.createdBy,
-        }),
-      }))
-    );
+    const taskOutputWithReplacements = this.shouldUploadAttachmentsPerPage()
+      ? await Promise.all(
+          taskOutput.map(async (item) => ({
+            ...item,
+            content: await this.uploadAttachments({
+              doc: item.content,
+              externalId: item.externalId,
+              createdBy: importTask.import.createdBy,
+            }),
+          }))
+        )
+      : taskOutput;

    await sequelize.transaction(async (transaction) => {
      await Promise.all(
-        chunk(childTasksInput, PagePerImportTask).map(async (input) => {
-          await ImportTask.create(
-            {
-              state: ImportTaskState.Created,
-              input,
-              importId: importTask.importId,
-            },
-            { transaction }
-          );
-        })
+        chunk(childTasksInput as BaseImportTaskInput, PagePerImportTask).map(
+          async (input) => {
+            await ImportTask.create(
+              {
+                state: ImportTaskState.Created,
+                phase: ImportTaskPhase.Page,
+                input: input as ImportTaskInput<T>,
+                importId: importTask.importId,
+              },
+              { transaction }
+            );
+          }
+        )
      );

      importTask.output = taskOutputWithReplacements;
@@ -206,10 +223,16 @@ export default abstract class APIImportTask<
      return await this.scheduleNextTask(nextImportTask);
    }

-    // All tasks for this import have been processed.
+    // All tasks for this import have been processed. Run the post-completion
+    // hook before flipping state so subclasses can perform work that must
+    // happen before "imports.processed" downstream handlers fire.
+    await this.onAllTasksCompleted(importTask);
+
    await sequelize.transaction(async (transaction) => {
      const associatedImport = importTask.import;
      associatedImport.state = ImportState.Processed;
+      // Release any cross-phase scratch state — the import is done with it.
+      associatedImport.scratch = null;
      await associatedImport.saveWithCtx(
        createContext({
          user: associatedImport.createdBy,
@@ -222,13 +245,63 @@ export default abstract class APIImportTask<
  }

  /**
-   * Process the import task.
-   * This fetches data from external source and converts it to task output.
+   * Whether the base class should create Attachment rows and upload S3 blobs
+   * per page during `onProcess`. Defaults to `true` for sources whose
+   * attachments are addressable by per-task URLs (e.g. Notion). Sources where
+   * attachments are shared across pages or live in a single archive may
+   * override this and handle attachment persistence in `onAllTasksCompleted`.
+   *
+   * @returns true to enable the per-page attachment upload step.
+   */
+  protected shouldUploadAttachmentsPerPage(): boolean {
+    return true;
+  }
+
+  /**
+   * Hook invoked after the final import task has been processed but before the
+   * associated `Import` state transitions to `Processed`. Subclasses can
+   * override to perform cross-task finalization (e.g. uploading shared
+   * attachments) that must happen before the persistence pass.
+   *
+   * @param lastImportTask The most recently completed ImportTask for the import.
+   * @returns Promise that resolves when finalization is complete.
+   */
+  protected async onAllTasksCompleted(
+    // oxlint-disable-next-line @typescript-eslint/no-unused-vars
+    lastImportTask: ImportTask<T>
+  ): Promise<void> {
+    return;
+  }
+
+  /**
+   * Bootstrap phase. Runs once per import on a worker that owns the source
+   * artifact (e.g. extracts a zip, walks the file tree, schedules child page
+   * tasks). Subclasses without a bootstrap step leave this unimplemented; the
+   * base only invokes it when an `ImportTask` is created with
+   * `phase === ImportTaskPhase.Bootstrap`.
   *
   * @param importTask ImportTask model to process.
   * @returns Promise with output that resolves once processing has completed.
   */
-  protected abstract process(
+  protected processBootstrap(
+    // oxlint-disable-next-line @typescript-eslint/no-unused-vars
+    importTask: ImportTask<T>
+  ): Promise<ProcessOutput<T>> {
+    throw new Error(
+      `${this.constructor.name} does not implement processBootstrap()`
+    );
+  }
+
+  /**
+   * Page phase. Runs for every `ImportTask` row with
+   * `phase === ImportTaskPhase.Page`, transforming a batch of source pages
+   * into ProseMirror output and optionally cascading descendants as the next
+   * wave of child tasks.
+   *
+   * @param importTask ImportTask model to process.
+   * @returns Promise with output that resolves once processing has completed.
+   */
+  protected abstract processPage(
    importTask: ImportTask<T>
  ): Promise<ProcessOutput<T>>;

@@ -1,139 +0,0 @@
-/* oxlint-disable @typescript-eslint/no-empty-function */
-import path from "node:path";
-import { FileOperation } from "@server/models";
-import { buildFileOperation } from "@server/test/factories";
-import ImportMarkdownZipTask from "./ImportMarkdownZipTask";
-
-describe("ImportMarkdownZipTask", () => {
-  it("should import the documents, attachments", async () => {
-    const fileOperation = await buildFileOperation();
-    Object.defineProperty(fileOperation, "handle", {
-      get() {
-        return {
-          path: path.resolve(
-            __dirname,
-            "..",
-            "..",
-            "test",
-            "fixtures",
-            "outline-markdown.zip"
-          ),
-          cleanup: async () => {},
-        };
-      },
-    });
-    vi.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
-
-    const props = {
-      fileOperationId: fileOperation.id,
-    };
-
-    const task = new ImportMarkdownZipTask();
-    const response = await task.perform(props);
-
-    expect(response.collections.size).toEqual(1);
-    expect(response.documents.size).toEqual(8);
-    expect(response.attachments.size).toEqual(6);
-  }, 10000);
-
-  it("should import the documents, public attachments", async () => {
-    const fileOperation = await buildFileOperation();
-    Object.defineProperty(fileOperation, "handle", {
-      get() {
-        return {
-          path: path.resolve(
-            __dirname,
-            "..",
-            "..",
-            "test",
-            "fixtures",
-            "outline-markdown-public.zip"
-          ),
-          cleanup: async () => {},
-        };
-      },
-    });
-    vi.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
-
-    const props = {
-      fileOperationId: fileOperation.id,
-    };
-
-    const task = new ImportMarkdownZipTask();
-    const response = await task.perform(props);
-
-    expect(response.collections.size).toEqual(1);
-    expect(response.documents.size).toEqual(2);
-    expect(response.attachments.size).toEqual(1);
-  }, 10000);
-
-  it("should throw an error with corrupt zip", async () => {
-    const fileOperation = await buildFileOperation();
-    Object.defineProperty(fileOperation, "handle", {
-      get() {
-        return {
-          path: path.resolve(
-            __dirname,
-            "..",
-            "..",
-            "test",
-            "fixtures",
-            "corrupt.zip"
-          ),
-          cleanup: async () => {},
-        };
-      },
-    });
-    vi.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
-
-    const props = {
-      fileOperationId: fileOperation.id,
-    };
-
-    let error;
-    try {
-      const task = new ImportMarkdownZipTask();
-      await task.perform(props);
-    } catch (err) {
-      error = err;
-    }
-
-    expect(error && error.message).toBeTruthy();
-  });
-
-  it("should throw an error with empty collection in zip", async () => {
-    const fileOperation = await buildFileOperation();
-    Object.defineProperty(fileOperation, "handle", {
-      get() {
-        return {
-          path: path.resolve(
-            __dirname,
-            "..",
-            "..",
-            "test",
-            "fixtures",
-            "empty.zip"
-          ),
-          cleanup: async () => {},
-        };
-      },
-    });
-    vi.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
-
-    const props = {
-      fileOperationId: fileOperation.id,
-    };
-
-    let error;
-    try {
-      const task = new ImportMarkdownZipTask();
-      await task.perform(props);
-    } catch (err) {
-      error = err;
-    }
-
-    expect(error && error.message).toContain(
-      "Uploaded file does not contain any valid collections"
-    );
-  });
-});
@@ -1,286 +0,0 @@
-import path from "node:path";
-import fs from "fs-extra";
-import { escapeRegExp } from "es-toolkit/compat";
-import mime from "mime-types";
-import { randomUUID } from "node:crypto";
-import documentImporter from "@server/commands/documentImporter";
-import { createContext } from "@server/context";
-import Logger from "@server/logging/Logger";
-import type { FileOperation } from "@server/models";
-import { User } from "@server/models";
-import { Buckets } from "@server/models/helpers/AttachmentHelper";
-import { sequelize } from "@server/storage/database";
-import type { FileTreeNode } from "@server/utils/ImportHelper";
-import ImportHelper from "@server/utils/ImportHelper";
-import type { StructuredImportData } from "./ImportTask";
-import ImportTask from "./ImportTask";
-
-export default class ImportMarkdownZipTask extends ImportTask {
-  public async parseData(
-    dirPath: string,
-    fileOperation: FileOperation
-  ): Promise<StructuredImportData> {
-    const tree = await ImportHelper.toFileTree(dirPath);
-    if (!tree) {
-      throw new Error("Could not find valid content in zip file");
-    }
-
-    return this.parseFileTree(fileOperation, tree.children);
-  }
-
-  /**
-   * Check if a folder contains only attachment files (no markdown documents).
-   *
-   * @param node The file tree node to check
-   * @returns true if the folder contains only non-markdown files
-   */
-  private isAttachmentFolder(node: FileTreeNode): boolean {
-    if (node.children.length === 0) {
-      return false;
-    }
-
-    if (node.title.toLowerCase() === "attachments") {
-      return true;
-    }
-
-    return node.children.every((child) => {
-      // If child has children, it's a folder - recurse to check its contents
-      if (child.children.length > 0) {
-        return this.isAttachmentFolder(child);
-      }
-
-      // Child has no children - could be a file or empty folder
-      const ext = path.extname(child.name).toLowerCase();
-
-      // If no extension, it's likely an empty folder, not a file.
-      // Be conservative and don't treat it as an attachment.
-      if (!ext) {
-        return false;
-      }
-
-      // It's a file with an extension - check if it's NOT markdown
-      return ext !== ".md" && ext !== ".markdown";
-    });
-  }
-
-  /**
-   * Recursively process all files in a folder as attachments.
-   *
-   * @param node The file tree node to process
-   * @param output The structured import data to add attachments to
-   */
-  private parseAttachmentFolder(
-    node: FileTreeNode,
-    output: StructuredImportData
-  ): void {
-    for (const child of node.children) {
-      if (child.children.length > 0) {
-        this.parseAttachmentFolder(child, output);
-      } else {
-        const id = randomUUID();
-        output.attachments.push({
-          id,
-          name: child.name,
-          path: child.path,
-          mimeType: mime.lookup(child.path) || "application/octet-stream",
-          buffer: () => fs.readFile(child.path),
-        });
-      }
-    }
-  }
-
-  /**
-   * Converts the file structure from zipAsFileTree into documents,
-   * collections, and attachments.
-   *
-   * @param fileOperation The file operation
-   * @param tree An array of FileTreeNode representing root files in the zip
-   * @returns A StructuredImportData object
-   */
-  private async parseFileTree(
-    fileOperation: FileOperation,
-    tree: FileTreeNode[]
-  ): Promise<StructuredImportData> {
-    const user = await User.findByPk(fileOperation.userId, {
-      rejectOnEmpty: true,
-    });
-    const output: StructuredImportData = {
-      collections: [],
-      documents: [],
-      attachments: [],
-    };
-
-    const docPathToIdMap = new Map<string, string>();
-
-    const parseNodeChildren = async (
-      children: FileTreeNode[],
-      collectionId: string,
-      parentDocumentId?: string
-    ): Promise<void> => {
-      for (const child of children) {
-        // special case for folders of attachments - detect by content
-        if (child.children.length > 0 && this.isAttachmentFolder(child)) {
-          this.parseAttachmentFolder(child, output);
-          continue;
-        }
-
-        const id = randomUUID();
-
-        const { title, icon, text } = await sequelize.transaction(
-          async (transaction) =>
-            documentImporter({
-              mimeType: "text/markdown",
-              fileName: child.name,
-              content:
-                child.children.length > 0
-                  ? ""
-                  : await fs.readFile(child.path, "utf8"),
-              user,
-              ctx: createContext({ user, transaction }),
-            })
-        );
-
-        const existingDocumentIndex = output.documents.findIndex(
-          (doc) =>
-            doc.title === title &&
-            doc.collectionId === collectionId &&
-            doc.parentDocumentId === parentDocumentId
-        );
-
-        const existingDocument = output.documents[existingDocumentIndex];
-
-        // When there is a file and a folder with the same name this handles
-        // the case by combining the two into one document with nested children
-        if (existingDocument) {
-          docPathToIdMap.set(child.path, existingDocument.id);
-
-          if (existingDocument.text === "") {
-            output.documents[existingDocumentIndex].text = text;
-          }
-
-          await parseNodeChildren(
-            child.children,
-            collectionId,
-            existingDocument.id
-          );
-        } else {
-          docPathToIdMap.set(child.path, id);
-
-          output.documents.push({
-            id,
-            title,
-            icon,
-            text,
-            collectionId,
-            parentDocumentId,
-            path: child.path,
-            mimeType: "text/markdown",
-          });
-
-          await parseNodeChildren(child.children, collectionId, id);
-        }
-      }
-    };
-
-    // All nodes in the root level should be collections
-    for (const node of tree) {
-      if (node.children.length > 0) {
-        // Check if this is an attachments-only folder at root level
-        if (this.isAttachmentFolder(node)) {
-          this.parseAttachmentFolder(node, output);
-          continue;
-        }
-
-        const collectionId = randomUUID();
-        output.collections.push({
-          id: collectionId,
-          name: node.title,
-        });
-        await parseNodeChildren(node.children, collectionId);
-      } else {
-        Logger.debug("task", `Unhandled file in zip: ${node.path}`, {
-          fileOperationId: fileOperation.id,
-        });
-      }
-    }
-
-    for (const document of output.documents) {
-      // Check all of the attachments we've created against urls in the text
-      // and replace them out with attachment redirect urls before continuing.
-      for (const attachment of output.attachments) {
-        const encodedPath = encodeURI(attachment.path);
-        const attachmentFileName = path.basename(attachment.path);
-        const reference = `<<${attachment.id}>>`;
-
-        // Pull the collection and subdirectory out of the path name, upload
-        // folders in an export are relative to the document itself.
-        // Support both legacy bucket names (uploads/public) and generic attachment folders.
-        let normalizedAttachmentPath = encodedPath
-          .replace(
-            new RegExp(`(.*)/${Buckets.uploads}/`),
-            `${Buckets.uploads}/`
-          )
-          .replace(new RegExp(`(.*)/${Buckets.public}/`), `${Buckets.public}/`);
-
-        // Also try normalizing to just the folder containing the attachment
-        // This handles arbitrary folder names like "attachments/"
-        const attachmentDir = path.basename(path.dirname(attachment.path));
-        const genericNormalizedPath = `${attachmentDir}/${encodeURI(attachmentFileName)}`;
-
-        document.text = document.text
-          .replace(new RegExp(escapeRegExp(encodedPath), "g"), reference)
-          .replace(
-            new RegExp(`\\.?/?${escapeRegExp(normalizedAttachmentPath)}`, "g"),
-            reference
-          )
-          .replace(
-            new RegExp(`\\.?/?${escapeRegExp(genericNormalizedPath)}`, "g"),
-            reference
-          );
-
-        // Handle markdown links that reference attachments via a path rooted
-        // at an "attachments" folder, optionally prefixed with "./", e.g.
-        // ./attachments/foo.png or ./attachments/sub/foo.png.
-        const segments = attachment.path.split(path.sep);
-        const attachmentsIdx = segments.findIndex(
-          (seg) => seg.toLowerCase() === "attachments"
-        );
-        if (attachmentsIdx >= 0) {
-          const relFromAttachments = segments.slice(attachmentsIdx).join("/");
-          document.text = document.text.replace(
-            new RegExp(
-              `\\.?/?${escapeRegExp(encodeURI(relFromAttachments))}`,
-              "g"
-            ),
-            reference
-          );
-        }
-      }
-
-      const basePath = path.dirname(document.path);
-
-      // check internal document links in the text and replace them with placeholders.
-      // When persisting, the placeholders will be replaced with the right urls.
-      const internalLinks = [
-        ...document.text.matchAll(/\[[^\]]+\]\(([^)]+\.md)\)/g),
-      ];
-
-      internalLinks.forEach((match) => {
-        const referredDocPath = match[1];
-        const normalizedDocPath = decodeURI(
-          path.normalize(`${basePath}/${referredDocPath}`)
-        );
-
-        const referredDocId = docPathToIdMap.get(normalizedDocPath);
-        if (referredDocId) {
-          document.text = document.text.replace(
-            referredDocPath,
-            `<<${referredDocId}>>`
-          );
-        }
-      });
-    }
-
-    return output;
-  }
-}
@@ -0,0 +1,116 @@
+import {
+  rewriteAttachmentPaths,
+  rewriteInternalLinks,
+} from "./MarkdownAPIImportTask";
+
+describe("rewriteAttachmentPaths", () => {
+  it("replaces a direct encoded path with the placeholder", () => {
+    const out = rewriteAttachmentPaths(
+      "![alt](My%20Collection/attachments/foo.png)",
+      [{ id: "att-1", pathInZip: "My Collection/attachments/foo.png" }]
+    );
+    expect(out).toBe("![alt](<<att-1>>)");
+  });
+
+  it("normalizes legacy `uploads/` bucket layout", () => {
+    const out = rewriteAttachmentPaths("![x](./uploads/abc/file.png)", [
+      {
+        id: "att-2",
+        pathInZip: "Some Collection/uploads/abc/file.png",
+      },
+    ]);
+    expect(out).toBe("![x](<<att-2>>)");
+  });
+
+  it("normalizes legacy `public/` bucket layout", () => {
+    const out = rewriteAttachmentPaths("![x](./public/abc/file.png)", [
+      {
+        id: "att-3",
+        pathInZip: "Some Collection/public/abc/file.png",
+      },
+    ]);
+    expect(out).toBe("![x](<<att-3>>)");
+  });
+
+  it("handles arbitrary folder names like 'attachments/'", () => {
+    const out = rewriteAttachmentPaths("![x](./attachments/foo.png)", [
+      { id: "att-4", pathInZip: "Collection/attachments/foo.png" },
+    ]);
+    expect(out).toBe("![x](<<att-4>>)");
+  });
+
+  it("matches nested attachments folders", () => {
+    const out = rewriteAttachmentPaths("![x](./attachments/sub/bar.png)", [
+      {
+        id: "att-5",
+        pathInZip: "Collection/Doc/attachments/sub/bar.png",
+      },
+    ]);
+    expect(out).toBe("![x](<<att-5>>)");
+  });
+
+  it("substitutes multiple references in the same document", () => {
+    const out = rewriteAttachmentPaths(
+      "![a](./attachments/a.png) and ![b](./attachments/b.png)",
+      [
+        { id: "id-a", pathInZip: "C/attachments/a.png" },
+        { id: "id-b", pathInZip: "C/attachments/b.png" },
+      ]
+    );
+    expect(out).toBe("![a](<<id-a>>) and ![b](<<id-b>>)");
+  });
+
+  it("is a no-op when no attachments match", () => {
+    const out = rewriteAttachmentPaths("![x](https://example.com/a.png)", [
+      { id: "id-a", pathInZip: "C/attachments/a.png" },
+    ]);
+    expect(out).toBe("![x](https://example.com/a.png)");
+  });
+});
+
+describe("rewriteInternalLinks", () => {
+  it("rewrites a sibling .md link to a placeholder", () => {
+    const out = rewriteInternalLinks(
+      "see [other](./other.md)",
+      "Collection/parent.md",
+      { "Collection/other.md": "doc-1" }
+    );
+    expect(out).toBe("see [other](<<doc-1>>)");
+  });
+
+  it("rewrites a nested .md link", () => {
+    const out = rewriteInternalLinks(
+      "see [child](./sub/child.md)",
+      "Collection/parent.md",
+      { "Collection/sub/child.md": "doc-2" }
+    );
+    expect(out).toBe("see [child](<<doc-2>>)");
+  });
+
+  it("leaves unresolved .md links untouched", () => {
+    const out = rewriteInternalLinks(
+      "see [missing](./missing.md)",
+      "Collection/parent.md",
+      {}
+    );
+    expect(out).toBe("see [missing](./missing.md)");
+  });
+
+  it("ignores non-md links", () => {
+    const out = rewriteInternalLinks(
+      "see [site](https://example.com)",
+      "Collection/parent.md",
+      { "Collection/parent.md": "doc-self" }
+    );
+    expect(out).toBe("see [site](https://example.com)");
+  });
+
+  it("decodes encoded path segments before lookup", () => {
+    const out = rewriteInternalLinks(
+      "see [other](./My%20Doc.md)",
+      "Collection/parent.md",
+      { "Collection/My Doc.md": "doc-3" }
+    );
+    expect(out).toBe("see [other](<<doc-3>>)");
+  });
+});
@@ -0,0 +1,672 @@
+import path from "node:path";
+import { randomUUID } from "node:crypto";
+import { escapeRegExp } from "es-toolkit/compat";
+import fs from "fs-extra";
+import mime from "mime-types";
+import { UniqueConstraintError } from "sequelize";
+import tmp from "tmp";
+import type {
+  ImportTaskInput,
+  ImportTaskOutput,
+  MarkdownAttachmentManifestItem,
+  MarkdownPageImportTaskInputItem,
+} from "@shared/schema";
+import type { IntegrationService, ProsemirrorDoc } from "@shared/types";
+import { AttachmentPreset } from "@shared/types";
+import attachmentCreator from "@server/commands/attachmentCreator";
+import { createContext } from "@server/context";
+import env from "@server/env";
+import Logger from "@server/logging/Logger";
+import type { ImportTask } from "@server/models";
+import { Attachment } from "@server/models";
+import { Buckets } from "@server/models/helpers/AttachmentHelper";
+import { ProsemirrorHelper } from "@server/models/helpers/ProsemirrorHelper";
+import { sequelize } from "@server/storage/database";
+import FileStorage from "@server/storage/files";
+import type { FileTreeNode } from "@server/utils/ImportHelper";
+import ImportHelper from "@server/utils/ImportHelper";
+import ZipHelper from "@server/utils/ZipHelper";
+import type { ProcessOutput } from "./APIImportTask";
+import APIImportTask from "./APIImportTask";
+import { DocumentConverter } from "@server/utils/DocumentConverter";
+
+type Markdown = IntegrationService.Markdown;
+
+interface ExtractedZip {
+  dirPath: string;
+  cleanup: () => Promise<void>;
+}
+
+interface DiscoveredDocument {
+  id: string;
+  title: string;
+  pathInZip: string;
+  collectionId: string;
+  parentDocumentId?: string;
+  markdownText: string;
+  children: DiscoveredDocument[];
+}
+
+interface DiscoveredCollection {
+  id: string;
+  title: string;
+  children: DiscoveredDocument[];
+}
+
+interface AttachmentRef {
+  id: string;
+  pathInZip: string;
+}
+
+/**
+ * Rewrites local attachment paths in markdown text into `<<attachmentId>>`
+ * placeholders. Supports legacy bucket layouts (`uploads/`, `public/`),
+ * arbitrary nested folder names, and `./attachments/...` rooted paths. Both
+ * encoded and unencoded path forms are matched.
+ *
+ * Exported for tests; not part of the module's public surface.
+ *
+ * @param markdown The raw markdown text from a single document.
+ * @param attachments Attachment manifest entries to substitute.
+ * @returns Markdown text with local paths replaced by `<<id>>` references.
+ */
+export function rewriteAttachmentPaths(
+  markdown: string,
+  attachments: AttachmentRef[]
+): string {
+  let text = markdown;
+
+  for (const attachment of attachments) {
+    const encodedPath = encodeURI(attachment.pathInZip);
+    const attachmentFileName = path.basename(attachment.pathInZip);
+    const reference = `<<${attachment.id}>>`;
+
+    const normalizedAttachmentPath = encodedPath
+      .replace(new RegExp(`(.*)/${Buckets.uploads}/`), `${Buckets.uploads}/`)
+      .replace(new RegExp(`(.*)/${Buckets.public}/`), `${Buckets.public}/`);
+
+    const attachmentDir = path.basename(path.dirname(attachment.pathInZip));
+    const genericNormalizedPath = `${attachmentDir}/${encodeURI(attachmentFileName)}`;
+
+    text = text
+      .replace(new RegExp(escapeRegExp(encodedPath), "g"), reference)
+      .replace(
+        new RegExp(`\\.?/?${escapeRegExp(normalizedAttachmentPath)}`, "g"),
+        reference
+      );
+
+    const segments = attachment.pathInZip.split(path.sep);
+    const attachmentsIdx = segments.findIndex(
+      (seg) => seg.toLowerCase() === "attachments"
+    );
+    if (attachmentsIdx >= 0) {
+      const relFromAttachments = segments.slice(attachmentsIdx).join("/");
+      text = text.replace(
+        new RegExp(`\\.?/?${escapeRegExp(encodeURI(relFromAttachments))}`, "g"),
+        reference
+      );
+    }
+
+    text = text.replace(
+      new RegExp(`\\.?/?${escapeRegExp(genericNormalizedPath)}`, "g"),
+      reference
+    );
+  }
+
+  return text;
+}
+
+/**
+ * Rewrites internal markdown links (`[label](./relative.md)`) into
+ * `<<documentId>>` placeholders, resolved against a path → id map built from
+ * the zip's full document tree.
+ *
+ * Exported for tests; not part of the module's public surface.
+ *
+ * @param markdown The raw markdown text from a single document.
+ * @param documentPath Zip-relative path of the document being rewritten
+ *                     (e.g. `Collection/parent.md`); used as the base for
+ *                     resolving relative link targets against docMap keys.
+ * @param docMap Map of document path (as it appeared in the zip) to its
+ *               pre-assigned externalId.
+ * @returns Markdown text with internal `.md` link targets replaced by
+ *          `<<id>>` references.
+ */
+export function rewriteInternalLinks(
+  markdown: string,
+  documentPath: string,
+  docMap: Record<string, string>
+): string {
+  const basePath = path.dirname(documentPath);
+  const internalLinks = [...markdown.matchAll(/\[[^\]]+\]\(([^)]+\.md)\)/g)];
+
+  let text = markdown;
+  for (const match of internalLinks) {
+    const referredDocPath = match[1];
+    const normalizedDocPath = decodeURI(
+      path.normalize(`${basePath}/${referredDocPath}`)
+    );
+
+    const referredDocId = docMap[normalizedDocPath];
+    if (referredDocId) {
+      text = text.replace(referredDocPath, `<<${referredDocId}>>`);
+    }
+  }
+
+  return text;
+}
+
+export default class MarkdownAPIImportTask extends APIImportTask<Markdown> {
+  protected shouldUploadAttachmentsPerPage(): boolean {
+    return false;
+  }
+
+  protected async scheduleNextTask(importTask: ImportTask<Markdown>) {
+    await new MarkdownAPIImportTask().schedule({ importTaskId: importTask.id });
+  }
+
+  protected async onAllTasksCompleted(
+    lastImportTask: ImportTask<Markdown>
+  ): Promise<void> {
+    const scratch = lastImportTask.import.scratch;
+    if (!scratch?.storageKey || !scratch.manifest?.length) {
+      return;
+    }
+
+    const { dirPath, cleanup } = await this.downloadAndExtract(
+      scratch.storageKey
+    );
+
+    try {
+      const createdBy = lastImportTask.import.createdBy;
+
+      for (const item of scratch.manifest) {
+        const filePath = path.join(dirPath, item.pathInZip);
+        let buffer: Buffer;
+        try {
+          buffer = await fs.readFile(filePath);
+        } catch (err) {
+          Logger.warn(
+            `Markdown import attachment missing in zip, skipping: ${item.pathInZip}`,
+            err instanceof Error ? err : undefined
+          );
+          continue;
+        }
+
+        try {
+          await sequelize.transaction(async (transaction) =>
+            attachmentCreator({
+              source: "import",
+              preset: AttachmentPreset.DocumentAttachment,
+              id: item.id,
+              name: item.name,
+              type: item.mimeType,
+              buffer,
+              user: createdBy,
+              ctx: createContext({ user: createdBy, transaction }),
+              fetchOptions: {
+                timeout: env.FILE_STORAGE_IMPORT_TIMEOUT,
+              },
+            })
+          );
+        } catch (err) {
+          // Each attachment commits in its own transaction, so a retry of
+          // this hook can re-encounter ids that already landed. Treat the
+          // unique-id collision as a no-op so the import remains resumable.
+          if (err instanceof UniqueConstraintError) {
+            continue;
+          }
+          throw err;
+        }
+      }
+    } finally {
+      await cleanup();
+    }
+  }
+
+  protected async processBootstrap(
+    importTask: ImportTask<Markdown>
+  ): Promise<ProcessOutput<Markdown>> {
+    const storageKey = importTask.import.scratch?.storageKey;
+    if (!storageKey) {
+      throw new Error("Markdown import is missing scratch.storageKey");
+    }
+
+    const { dirPath, cleanup } = await this.downloadAndExtract(storageKey);
+
+    try {
+      const tree = await ImportHelper.toFileTree(dirPath);
+      if (!tree) {
+        throw new Error("Could not find valid content in zip file");
+      }
+
+      const collections: DiscoveredCollection[] = [];
+      const manifest: MarkdownAttachmentManifestItem[] = [];
+
+      for (const node of tree.children) {
+        if (node.children.length === 0) {
+          Logger.debug("task", `Unhandled file in zip: ${node.path}`, {
+            importTaskId: importTask.id,
+          });
+          continue;
+        }
+
+        if (this.isAttachmentFolder(node)) {
+          this.collectAttachments(node, manifest, dirPath);
+          continue;
+        }
+
+        const collection: DiscoveredCollection = {
+          id: randomUUID(),
+          title: node.title,
+          children: [],
+        };
+        collections.push(collection);
+
+        await this.collectDocumentsAndAttachments({
+          children: node.children,
+          collectionId: collection.id,
+          out: collection.children,
+          manifest,
+          extractionRoot: dirPath,
+        });
+      }
+
+      // Build docMap (pathInZip -> externalId) for internal-link resolution.
+      // Walk the full document tree to collect every doc id, since internal
+      // markdown links can target any document regardless of depth.
+      const docMap: Record<string, string> = {};
+      const collectDocMap = (docs: DiscoveredDocument[]) => {
+        for (const d of docs) {
+          docMap[d.pathInZip] = d.id;
+          collectDocMap(d.children);
+        }
+      };
+      for (const c of collections) {
+        collectDocMap(c.children);
+      }
+
+      // Replace (not append) anything past the create-time placeholder with
+      // the freshly discovered collections so a retried bootstrap doesn't
+      // accumulate duplicate entries with fresh UUIDs from a previous
+      // partial run. ImportsProcessor's persistence pass treats these as
+      // collections.
+      const associatedImport = importTask.import;
+      const placeholder = associatedImport.input[0];
+      associatedImport.input = [
+        placeholder,
+        ...collections.map((c) => ({
+          externalId: c.id,
+          permission: placeholder.permission,
+        })),
+      ];
+      associatedImport.scratch = { storageKey, manifest };
+      await associatedImport.save();
+
+      // Append collection placeholder items so ImportsProcessor iterates
+      // them during the bootstrap row (the earliest createdAt) — that
+      // guarantees collections land in the DB before any per-page document
+      // references them.
+      const collectionInputItems: MarkdownPageImportTaskInputItem[] =
+        collections.map((c) => ({
+          externalId: c.id,
+          title: c.title,
+          path: c.title,
+          markdownText: "",
+          attachmentMap: [],
+          docMap: {},
+        }));
+
+      importTask.input = [importTask.input[0], ...collectionInputItems];
+
+      const collectionOutputs: ImportTaskOutput = collections.map((c) => ({
+        externalId: c.id,
+        title: c.title,
+        content: ProsemirrorHelper.getEmptyDocument() as ProsemirrorDoc,
+      }));
+
+      // First wave of document tasks: only top-level docs in each collection.
+      // Each carries its descendants in `children` and the per-page handler
+      // re-emits them as the next wave of childTasksInput, producing a strict
+      // depth-ordered cascade of ImportTask rows so parent FKs are always
+      // satisfied at child-doc creation time.
+      const childTasksInput: ImportTaskInput<Markdown> = collections.flatMap(
+        (c) => c.children.map((d) => this.toPageInput(d, manifest, docMap))
+      );
+
+      return { taskOutput: collectionOutputs, childTasksInput };
+    } finally {
+      await cleanup();
+    }
+  }
+
+  /**
+   * Converts a discovered document subtree into a per-page task input,
+   * recursively packing the doc's descendants into the `children` field so
+   * each tree-depth runs as its own task wave.
+   *
+   * @param doc The discovered document, including its descendants.
+   * @param manifest The full attachment manifest (used for per-page refs).
+   * @param docMap Path → externalId map for internal link rewriting.
+   * @returns A self-contained per-page task input.
+   */
+  private toPageInput(
+    doc: DiscoveredDocument,
+    manifest: MarkdownAttachmentManifestItem[],
+    docMap: Record<string, string>
+  ): MarkdownPageImportTaskInputItem {
+    return {
+      externalId: doc.id,
+      parentExternalId: doc.parentDocumentId,
+      collectionExternalId: doc.collectionId,
+      title: doc.title,
+      path: doc.pathInZip,
+      markdownText: doc.markdownText,
+      attachmentMap: this.attachmentsReferencedBy(doc.markdownText, manifest),
+      docMap,
+      children: doc.children.length
+        ? doc.children.map((c) => this.toPageInput(c, manifest, docMap))
+        : undefined,
+    };
+  }
+
+  protected async processPage(
+    importTask: ImportTask<Markdown>
+  ): Promise<ProcessOutput<Markdown>> {
+    const taskOutput: ImportTaskOutput = [];
+    const childTasksInput: MarkdownPageImportTaskInputItem[] = [];
+
+    const items = importTask.input as MarkdownPageImportTaskInputItem[];
+    for (const item of items) {
+      // Empty markdown short-circuits — used by collection placeholders so
+      // ImportsProcessor sees their externalId paired with empty content and
+      // builds a Collection rather than a Document. (Currently collections
+      // are persisted via the bootstrap task itself, so this branch is only
+      // a defensive fallback.)
+      if (!item.markdownText) {
+        taskOutput.push({
+          externalId: item.externalId,
+          title: item.title,
+          content: ProsemirrorHelper.getEmptyDocument() as ProsemirrorDoc,
+        });
+      } else {
+        const transformedMarkdown = this.rewriteMarkdown(item);
+        const { doc, title, icon } = await DocumentConverter.convert(
+          transformedMarkdown,
+          path.basename(item.path),
+          "text/markdown"
+        );
+
+        taskOutput.push({
+          externalId: item.externalId,
+          title: title || item.title,
+          icon,
+          content: doc.toJSON() as ProsemirrorDoc,
+        });
+      }
+
+      // Cascade this doc's direct descendants as the next task wave. Their
+      // ImportTask rows will be created after the current one returns, so
+      // their createdAt is strictly later — guaranteeing parent-before-child
+      // FK ordering during ImportsProcessor's persistence pass.
+      if (item.children?.length) {
+        childTasksInput.push(...item.children);
+      }
+    }
+
+    return { taskOutput, childTasksInput };
+  }
+
+  /**
+   * Pre-rewrites a page's markdown text. Internal `.md` links become mention
+   * markdown so the editor parses them as Document mentions. Attachment paths
+   * are first reduced to `<<id>>` placeholders by the shared rewriter, then
+   * — distinct from the prosemirror-tree walk we used to do — substituted
+   * with their final attachment redirect URLs in the markdown text. Doing
+   * the resolution at the text layer avoids markdown-it parsing `<<id>>` as
+   * an angle-bracket-wrapped URL (which produced broken image src attrs).
+   *
+   * @param page The per-page task input.
+   * @returns Rewritten markdown text ready for DocumentConverter.
+   */
+  private rewriteMarkdown(page: MarkdownPageImportTaskInputItem): string {
+    let text = rewriteInternalLinks(page.markdownText, page.path, page.docMap);
+
+    // Convert `[label](<<id>>)` links from rewriteInternalLinks into mention
+    // markdown the editor recognises: `@[label](mention://<uuid>/document/<id>)`.
+    text = text.replace(
+      /\[([^\]]+)\]\(<<([^>]+)>>\)/g,
+      (_full, label: string, externalId: string) =>
+        `@[${label}](mention://${randomUUID()}/document/${externalId})`
+    );
+
+    text = rewriteAttachmentPaths(
+      text,
+      page.attachmentMap.map((m) => ({ id: m.id, pathInZip: m.pathInZip }))
+    );
+
+    // Resolve remaining `<<id>>` placeholders to attachment redirect URLs.
+    text = text.replace(/<<([^>]+)>>/g, (_full, id: string) =>
+      Attachment.getRedirectUrl(id)
+    );
+
+    return text;
+  }
+
+  /**
+   * Returns the subset of the full manifest that is referenced anywhere in
+   * the given markdown text. Used to bound the per-page task input size.
+   *
+   * @param markdown Raw markdown text for a single document.
+   * @param manifest The full attachment manifest from the bootstrap phase.
+   * @returns Manifest entries that appear (by filename) in the markdown.
+   */
+  private attachmentsReferencedBy(
+    markdown: string,
+    manifest: MarkdownAttachmentManifestItem[]
+  ): MarkdownAttachmentManifestItem[] {
+    return manifest.filter((item) => {
+      const fileName = path.basename(item.pathInZip);
+      return (
+        markdown.includes(fileName) || markdown.includes(encodeURI(fileName))
+      );
+    });
+  }
+
+  /**
+   * Detects folders containing only attachments (no markdown documents).
+   * Recursively considers nested folders; mirrors the legacy heuristic.
+   *
+   * @param node FileTreeNode to inspect.
+   * @returns true when the folder appears to hold only attachments.
+   */
+  private isAttachmentFolder(node: FileTreeNode): boolean {
+    if (node.children.length === 0) {
+      return false;
+    }
+    if (node.title.toLowerCase() === "attachments") {
+      return true;
+    }
+    return node.children.every((child) => {
+      if (child.children.length > 0) {
+        return this.isAttachmentFolder(child);
+      }
+      const ext = path.extname(child.name).toLowerCase();
+      if (!ext) {
+        return false;
+      }
+      return ext !== ".md" && ext !== ".markdown";
+    });
+  }
+
+  /**
+   * Recursively collects all files under an attachment-only folder into the
+   * manifest. `pathInZip` is stored as a path relative to the extraction
+   * root so it can be resolved again after the zip is re-extracted during
+   * the completion phase (which lands in a fresh tmp dir).
+   *
+   * @param node Attachment-folder FileTreeNode.
+   * @param manifest Manifest array to push entries into.
+   * @param extractionRoot Absolute path to the zip extraction root.
+   */
+  private collectAttachments(
+    node: FileTreeNode,
+    manifest: MarkdownAttachmentManifestItem[],
+    extractionRoot: string
+  ): void {
+    for (const child of node.children) {
+      if (child.children.length > 0) {
+        this.collectAttachments(child, manifest, extractionRoot);
+        continue;
+      }
+      manifest.push({
+        id: randomUUID(),
+        name: child.name,
+        pathInZip: path.relative(extractionRoot, child.path),
+        mimeType: mime.lookup(child.path) || "application/octet-stream",
+      });
+    }
+  }
+
+  /**
+   * Walks a collection subtree and gathers documents (markdown files) and
+   * loose attachments. Documents are appended to `out` as a tree — each
+   * entry's `children` holds its direct descendants. This is the shape the
+   * per-page task cascade consumes.
+   *
+   * @param children FileTreeNode children of the current folder.
+   * @param collectionId Pre-assigned id of the enclosing collection.
+   * @param parentDocumentId Optional parent document id when nested.
+   * @param out Sibling accumulator to push discovered documents into.
+   * @param manifest Attachment manifest accumulator.
+   * @returns Promise that resolves when the subtree has been processed.
+   */
+  private async collectDocumentsAndAttachments({
+    children,
+    collectionId,
+    parentDocumentId,
+    out,
+    manifest,
+    extractionRoot,
+  }: {
+    children: FileTreeNode[];
+    collectionId: string;
+    parentDocumentId?: string;
+    out: DiscoveredDocument[];
+    manifest: MarkdownAttachmentManifestItem[];
+    extractionRoot: string;
+  }): Promise<void> {
+    for (const child of children) {
+      if (child.children.length > 0 && this.isAttachmentFolder(child)) {
+        this.collectAttachments(child, manifest, extractionRoot);
+        continue;
+      }
+
+      const ext = path.extname(child.name).toLowerCase();
+      const isMarkdown = ext === ".md" || ext === ".markdown";
+      const isFolder = child.children.length > 0;
+
+      if (!isMarkdown && !isFolder) {
+        manifest.push({
+          id: randomUUID(),
+          name: child.name,
+          pathInZip: path.relative(extractionRoot, child.path),
+          mimeType: mime.lookup(child.path) || "application/octet-stream",
+        });
+        continue;
+      }
+
+      const id = randomUUID();
+      const markdownText = isFolder
+        ? ""
+        : await fs.readFile(child.path, "utf8");
+
+      // Folder-and-file with the same title (a "name.md" alongside a "name/"
+      // directory) is merged onto a single document: the folder body picks up
+      // the file's markdown text, and the folder's contents become children.
+      const sibling = out.find((d) => d.title === child.title);
+
+      if (sibling) {
+        if (sibling.markdownText === "" && markdownText) {
+          sibling.markdownText = markdownText;
+        }
+        if (isFolder) {
+          await this.collectDocumentsAndAttachments({
+            children: child.children,
+            collectionId,
+            parentDocumentId: sibling.id,
+            out: sibling.children,
+            manifest,
+            extractionRoot,
+          });
+        }
+        continue;
+      }
+
+      const node: DiscoveredDocument = {
+        id,
+        title: child.title,
+        pathInZip: path.relative(extractionRoot, child.path),
+        collectionId,
+        parentDocumentId,
+        markdownText,
+        children: [],
+      };
+      out.push(node);
+
+      if (isFolder) {
+        await this.collectDocumentsAndAttachments({
+          children: child.children,
+          collectionId,
+          parentDocumentId: id,
+          out: node.children,
+          manifest,
+          extractionRoot,
+        });
+      }
+    }
+  }
+
+  /**
+   * Downloads the zip from object storage and extracts it into a temporary
+   * directory.
+   *
+   * @param storageKey Storage key for the uploaded zip.
+   * @returns The temp dir path and a cleanup callback. Caller must invoke
+   *          cleanup() once finished.
+   */
+  private async downloadAndExtract(storageKey: string): Promise<ExtractedZip> {
+    const handle = await FileStorage.getFileHandle(storageKey);
+
+    let dirPath: string | undefined;
+    try {
+      dirPath = await new Promise<string>((resolve, reject) => {
+        tmp.dir({ unsafeCleanup: true }, (err, tmpDir) => {
+          if (err) {
+            reject(err);
+            return;
+          }
+          resolve(tmpDir);
+        });
+      });
+
+      await ZipHelper.extract(handle.path, dirPath);
+
+      return {
+        dirPath,
+        cleanup: async () => {
+          await fs
+            .rm(dirPath!, { recursive: true, force: true })
+            .catch(() => {});
+          await handle.cleanup().catch(() => {});
+        },
+      };
+    } catch (err) {
+      if (dirPath) {
+        await fs.rm(dirPath, { recursive: true, force: true }).catch(() => {});
+      }
+      await handle.cleanup().catch(() => {});
+      throw err;
+    }
+  }
+}
@@ -76,9 +76,11 @@ export const CollectionsImportSchema = BaseSchema.extend({
      .nullish()
      .transform((val) => (isUndefined(val) ? null : val)),
    attachmentId: z.uuid(),
+    // Markdown zip imports now run through `imports.create` →
+    // MarkdownAPIImportTask, so only JSON is accepted here.
    format: z
-      .enum(FileOperationFormat)
-      .prefault(FileOperationFormat.MarkdownZip),
+      .literal(FileOperationFormat.JSON)
+      .prefault(FileOperationFormat.JSON),
  }),
 });

@@ -1,15 +1,16 @@
 import Router from "koa-router";
+import { randomUUID } from "node:crypto";
 import { truncate } from "es-toolkit/compat";
 import type { WhereOptions } from "sequelize";
 import type { IntegrationType } from "@shared/types";
-import { ImportState, UserRole } from "@shared/types";
+import { ImportState, IntegrationService, UserRole } from "@shared/types";
 import { ImportValidation } from "@shared/validations";
 import { UnprocessableEntityError } from "@server/errors";
 import auth from "@server/middlewares/authentication";
 import { rateLimiter } from "@server/middlewares/rateLimiter";
 import { transaction } from "@server/middlewares/transaction";
 import validate from "@server/middlewares/validate";
-import { Integration } from "@server/models";
+import { Attachment, Integration } from "@server/models";
 import Import from "@server/models/Import";
 import { authorize } from "@server/policies";
 import { presentImport, presentPolicies } from "@server/presenters";
@@ -27,7 +28,7 @@ router.post(
  validate(T.ImportsCreateSchema),
  transaction(),
  async (ctx: APIContext<T.ImportsCreateReq>) => {
-    const { integrationId, service, input } = ctx.input.body;
+    const body = ctx.input.body;
    const { user } = ctx.state.auth;

    authorize(user, "createImport", user.team);
@@ -47,9 +48,41 @@ router.post(
      throw UnprocessableEntityError("An import is already in progress");
    }

+    if (body.service === IntegrationService.Markdown) {
+      const attachment = await Attachment.findByPk(body.attachmentId, {
+        rejectOnEmpty: true,
+      });
+      authorize(user, "read", attachment);
+
+      const importModel = await Import.createWithCtx(ctx, {
+        name: truncate(attachment.name, {
+          length: ImportValidation.maxNameLength,
+        }),
+        service: IntegrationService.Markdown,
+        state: ImportState.Created,
+        input: [
+          {
+            externalId: randomUUID(),
+            permission: body.permission,
+          },
+        ],
+        scratch: { storageKey: attachment.key },
+        integrationId: null,
+        createdById: user.id,
+        teamId: user.teamId,
+      });
+      importModel.createdBy = user;
+
+      ctx.body = {
+        data: presentImport(importModel),
+        policies: presentPolicies(user, [importModel]),
+      };
+      return;
+    }
+
    const integration = await Integration.findByPk<
      Integration<IntegrationType.Import>
-    >(integrationId, {
+    >(body.integrationId, {
      rejectOnEmpty: true,
    });
    authorize(user, "read", integration);
@@ -58,10 +91,10 @@ router.post(

    const importModel = await Import.createWithCtx(ctx, {
      name: truncate(name, { length: ImportValidation.maxNameLength }),
-      service,
+      service: body.service,
      state: ImportState.Created,
-      input,
-      integrationId,
+      input: body.input,
+      integrationId: body.integrationId,
      createdById: user.id,
      teamId: user.teamId,
    });
@@ -1,6 +1,7 @@
 import { z } from "zod";
 import { NotionImportInputItemSchema } from "@shared/schema";
 import {
+  CollectionPermission,
  ImportableIntegrationService,
  IntegrationService,
 } from "@shared/types";
@@ -37,6 +38,11 @@ export const ImportsCreateSchema = BaseSchema.extend({
      service: z.literal(IntegrationService.Notion),
      input: z.array(NotionImportInputItemSchema),
    }),
+    z.object({
+      service: z.literal(IntegrationService.Markdown),
+      attachmentId: z.uuid(),
+      permission: z.enum(CollectionPermission).optional(),
+    }),
  ]),
 });

@@ -20,8 +20,20 @@ export const NotionImportInputItemSchema = BaseImportInputItemSchema.extend({

 export type NotionImportInput = z.infer<typeof NotionImportInputItemSchema>[];

+export const MarkdownImportInputItemSchema = BaseImportInputItemSchema.extend({
+  externalId: z.string(),
+});
+
+export type MarkdownImportInput = z.infer<
+  typeof MarkdownImportInputItemSchema
+>[];
+
 export type ImportInput<T extends ImportableIntegrationService> =
-  T extends IntegrationService.Notion ? NotionImportInput : BaseImportInput;
+  T extends IntegrationService.Notion
+    ? NotionImportInput
+    : T extends IntegrationService.Markdown
+      ? MarkdownImportInput
+      : BaseImportInput;

 export const BaseImportTaskInputItemSchema = z.object({
  externalId: z.string(),
@@ -42,16 +54,88 @@ export type NotionImportTaskInput = z.infer<
  typeof NotionImportTaskInputItemSchema
 >[];

+/**
+ * Manifest entry describing a single attachment discovered during the
+ * Markdown zip bootstrap phase. The `id` is a pre-assigned UUID used both
+ * as the attachment node id in per-page prosemirror output and as the
+ * Attachment row id created during completion.
+ */
+export const MarkdownAttachmentManifestItemSchema = z.object({
+  id: z.uuid(),
+  name: z.string(),
+  mimeType: z.string(),
+  pathInZip: z.string(),
+});
+
+export type MarkdownAttachmentManifestItem = z.infer<
+  typeof MarkdownAttachmentManifestItemSchema
+>;
+
+/**
+ * Markdown importer scratch state. `storageKey` is set at import creation
+ * (it's the only durable handle on the uploaded zip). `manifest` is added
+ * by the bootstrap phase so the completion phase can re-download the zip
+ * and create Attachment rows without re-walking the tree.
+ */
+export interface MarkdownImportScratch {
+  storageKey: string;
+  manifest?: MarkdownAttachmentManifestItem[];
+}
+
+/**
+ * Per-importer scratch shape stored on `Import.scratch`. Holds cross-phase
+ * state that the importer needs between bootstrap and completion but that
+ * isn't part of any single task's input. Cleared when the import flips to
+ * `Processed`.
+ */
+export type ImportScratch<T extends ImportableIntegrationService> =
+  T extends IntegrationService.Markdown ? MarkdownImportScratch : never;
+
+/**
+ * Per-page task input. Generated by the bootstrap task and consumed by
+ * subsequent MarkdownAPIImportTask runs. `children` carries this document's
+ * direct descendants so that each level of the document tree is scheduled
+ * as a separate task wave; this preserves parent-before-child ordering
+ * during persistence (createdAt of child tasks is strictly later than
+ * parents'). The type is defined as a TypeScript interface rather than via
+ * z.infer because it is only consumed internally — never validated at an
+ * API boundary — and zod's recursive-schema ergonomics aren't worth the
+ * cost here.
+ */
+export interface MarkdownPageImportTaskInputItem {
+  externalId: string;
+  parentExternalId?: string;
+  collectionExternalId?: string;
+  title: string;
+  path: string;
+  markdownText: string;
+  attachmentMap: MarkdownAttachmentManifestItem[];
+  docMap: Record<string, string>;
+  children?: MarkdownPageImportTaskInputItem[];
+}
+
+/**
+ * Markdown import task input — a bootstrap row carrying only the base
+ * placeholder item (the zip's `storageKey` lives on `Import.scratch`), or a
+ * page row carrying per-document content.
+ */
+export type MarkdownImportTaskInput = (
+  | BaseImportTaskInput[number]
+  | MarkdownPageImportTaskInputItem
+)[];
+
 export type ImportTaskInput<T extends ImportableIntegrationService> =
  T extends IntegrationService.Notion
    ? NotionImportTaskInput
-    : BaseImportTaskInput;
+    : T extends IntegrationService.Markdown
+      ? MarkdownImportTaskInput
+      : BaseImportTaskInput;

 // No reason to be here except for co-location with import task input.
 export type ImportTaskOutput = {
  externalId: string;
  title: string;
-  emoji?: string;
+  icon?: string;
  author?: string;
  content: ProsemirrorDoc;
  createdAt?: Date;
@@ -97,6 +97,21 @@ export enum ImportTaskState {
  Canceled = "canceled",
 }

+/**
+ * Classifies the work an `ImportTask` row represents. Set when the task is
+ * created and used by `APIImportTask` to dispatch to the right handler.
+ *
+ * - `Bootstrap` runs once per import on a worker that owns the source
+ *   artifact (e.g. extracts a zip, discovers structure, schedules child
+ *   tasks). Subclasses without a bootstrap step never produce these.
+ * - `Page` is the per-document work that the bootstrap (or `ImportsProcessor`
+ *   for sources without a bootstrap, like Notion) fans out into.
+ */
+export enum ImportTaskPhase {
+  Bootstrap = "bootstrap",
+  Page = "page",
+}
+
 export enum MentionType {
  User = "user",
  Document = "document",
@@ -151,15 +166,17 @@ export enum IntegrationService {
  Linear = "linear",
  Figma = "figma",
  Notion = "notion",
+  Markdown = "markdown",
 }

 export type ImportableIntegrationService = Extract<
  IntegrationService,
-  IntegrationService.Notion
+  IntegrationService.Notion | IntegrationService.Markdown
 >;

 export const ImportableIntegrationService = {
  Notion: IntegrationService.Notion,
+  Markdown: IntegrationService.Markdown,
 } as const;

 export type IssueTrackerIntegrationService = Extract<