Files
Tom Moor ecafd5f32a chore: Update JSON importer to use zip streaming (#12380)
* chore: Update JSON importer to use zip streaming, new importer flow

* chore: Drop teamId from import urlId collision check and remove unused internal-id scaffolding

urlId is globally unique on Document/Collection so the team scope was wrong.
Also removes leftover internal-id generation in JSONAPIImportTask that was
never used in task input/output.

* Restore classes used upstream
2026-05-25 17:03:02 -04:00

251 lines
8.0 KiB
TypeScript

import { z } from "zod";
import type {
IntegrationService,
ProsemirrorData,
ProsemirrorDoc,
} from "./types";
import {
CollectionPermission,
type ImportableIntegrationService,
IssueTrackerIntegrationService,
} from "./types";
import { PageType } from "plugins/notion/shared/types";
const BaseImportInputItemSchema = z.object({
permission: z.enum(CollectionPermission).optional(),
});
export type BaseImportInput = z.infer<typeof BaseImportInputItemSchema>[];
export const NotionImportInputItemSchema = BaseImportInputItemSchema.extend({
type: z.enum(PageType).optional(),
externalId: z.string().optional(),
});
export type NotionImportInput = z.infer<typeof NotionImportInputItemSchema>[];
export const MarkdownImportInputItemSchema = BaseImportInputItemSchema.extend({
externalId: z.string(),
});
export type MarkdownImportInput = z.infer<
typeof MarkdownImportInputItemSchema
>[];
export const JSONImportInputItemSchema = BaseImportInputItemSchema.extend({
externalId: z.string(),
});
export type JSONImportInput = z.infer<typeof JSONImportInputItemSchema>[];
export type ImportInput<T extends ImportableIntegrationService> =
T extends IntegrationService.Notion
? NotionImportInput
: T extends IntegrationService.Markdown
? MarkdownImportInput
: T extends IntegrationService.JSON
? JSONImportInput
: BaseImportInput;
export const BaseImportTaskInputItemSchema = z.object({
externalId: z.string(),
parentExternalId: z.string().optional(),
collectionExternalId: z.string().optional(),
});
export type BaseImportTaskInput = z.infer<
typeof BaseImportTaskInputItemSchema
>[];
export const NotionImportTaskInputItemSchema =
BaseImportTaskInputItemSchema.extend({
type: z.enum(PageType),
});
export type NotionImportTaskInput = z.infer<
typeof NotionImportTaskInputItemSchema
>[];
/**
* Manifest entry describing a single attachment discovered during the
* Markdown zip bootstrap phase. The `id` is a pre-assigned UUID used both
* as the attachment node id in per-page prosemirror output and as the
* Attachment row id created during completion.
*/
export const MarkdownAttachmentManifestItemSchema = z.object({
id: z.uuid(),
name: z.string(),
mimeType: z.string(),
pathInZip: z.string(),
});
export type MarkdownAttachmentManifestItem = z.infer<
typeof MarkdownAttachmentManifestItemSchema
>;
/**
* Markdown importer scratch state. `storageKey` is set at import creation
* (it's the only durable handle on the uploaded zip). `manifest` is added
* by the bootstrap phase so the completion phase can re-download the zip
* and create Attachment rows without re-walking the tree.
*/
export interface MarkdownImportScratch {
storageKey: string;
manifest?: MarkdownAttachmentManifestItem[];
}
/**
* Manifest entry describing a single attachment discovered during the JSON
* zip bootstrap phase. `externalId` is the attachment's original id from the
* export — used to rewrite `/api/attachments.redirect?id=<externalId>`
* references in document/collection content into new redirect URLs that point
* at the freshly created Attachment row (`id`).
*/
export const JSONAttachmentManifestItemSchema = z.object({
id: z.uuid(),
externalId: z.string(),
name: z.string(),
mimeType: z.string(),
pathInZip: z.string(),
});
export type JSONAttachmentManifestItem = z.infer<
typeof JSONAttachmentManifestItemSchema
>;
/**
* JSON importer scratch state. `storageKey` is set at import creation (it's
* the only durable handle on the uploaded zip). `manifest` is added by the
* bootstrap phase so the completion phase can re-download the zip and create
* Attachment rows without re-parsing the JSON files.
*/
export interface JSONImportScratch {
storageKey: string;
manifest?: JSONAttachmentManifestItem[];
}
/**
* Per-importer scratch shape stored on `Import.scratch`. Holds cross-phase
* state that the importer needs between bootstrap and completion but that
* isn't part of any single task's input. Cleared when the import flips to
* `Processed`.
*/
export type ImportScratch<T extends ImportableIntegrationService> =
T extends IntegrationService.Markdown
? MarkdownImportScratch
: T extends IntegrationService.JSON
? JSONImportScratch
: never;
/**
* Per-page task input. Generated by the bootstrap task and consumed by
* subsequent MarkdownAPIImportTask runs. `children` carries this document's
* direct descendants so that each level of the document tree is scheduled
* as a separate task wave; this preserves parent-before-child ordering
* during persistence (createdAt of child tasks is strictly later than
* parents'). The type is defined as a TypeScript interface rather than via
* z.infer because it is only consumed internally — never validated at an
* API boundary — and zod's recursive-schema ergonomics aren't worth the
* cost here.
*/
export interface MarkdownPageImportTaskInputItem {
externalId: string;
parentExternalId?: string;
collectionExternalId?: string;
title: string;
path: string;
markdownText: string;
attachmentMap: MarkdownAttachmentManifestItem[];
docMap: Record<string, string>;
children?: MarkdownPageImportTaskInputItem[];
}
/**
* Markdown import task input — a bootstrap row carrying only the base
* placeholder item (the zip's `storageKey` lives on `Import.scratch`), or a
* page row carrying per-document content.
*/
export type MarkdownImportTaskInput = (
| BaseImportTaskInput[number]
| MarkdownPageImportTaskInputItem
)[];
/**
* Per-page task input for the JSON importer. Generated by the bootstrap task
* once the zip has been parsed; consumed by subsequent JSONAPIImportTask runs.
* `children` carries this document's direct descendants so each tree-depth
* runs as its own task wave, preserving parent-before-child ordering during
* persistence (createdAt of child tasks is strictly later than parents'). The
* type is defined as a TypeScript interface rather than via z.infer because
* it is only consumed internally — never validated at an API boundary — and
* zod's recursive-schema ergonomics aren't worth the cost here.
*/
export interface JSONPageImportTaskInputItem {
externalId: string;
parentExternalId?: string;
collectionExternalId?: string;
title: string;
urlId?: string;
icon?: string | null;
color?: string | null;
data: ProsemirrorData;
createdById?: string;
createdByName?: string;
createdByEmail?: string | null;
createdAt?: string;
updatedAt?: string;
publishedAt?: string | null;
/** Map of external attachment id → manifest entry id, scoped to this doc. */
attachmentIdMap: Record<string, string>;
children?: JSONPageImportTaskInputItem[];
}
/**
* JSON import task input — a bootstrap row carrying only the base placeholder
* item (the zip's `storageKey` lives on `Import.scratch`), or a page row
* carrying per-document content.
*/
export type JSONImportTaskInput = (
| BaseImportTaskInput[number]
| JSONPageImportTaskInputItem
)[];
export type ImportTaskInput<T extends ImportableIntegrationService> =
T extends IntegrationService.Notion
? NotionImportTaskInput
: T extends IntegrationService.Markdown
? MarkdownImportTaskInput
: T extends IntegrationService.JSON
? JSONImportTaskInput
: BaseImportTaskInput;
// No reason to be here except for co-location with import task input.
export type ImportTaskOutput = {
externalId: string;
title: string;
icon?: string | null;
color?: string | null;
urlId?: string;
author?: string;
/** Original author's id in the source system, used for user remapping. */
createdById?: string;
/** Original author's email in the source system, used for user remapping. */
createdByEmail?: string | null;
content: ProsemirrorDoc;
createdAt?: Date;
updatedAt?: Date;
publishedAt?: Date | null;
}[];
export const IssueSource = z.object({
id: z.string().nonempty(),
name: z.string().nonempty(),
owner: z.object({
id: z.string().nonempty(),
name: z.string().nonempty(),
}),
service: z.enum(IssueTrackerIntegrationService),
});
export type IssueSource = z.infer<typeof IssueSource>;