chore: Update JSON importer to use zip streaming (#12380)

* chore: Update JSON importer to use zip streaming, new importer flow

* chore: Drop teamId from import urlId collision check and remove unused internal-id scaffolding

urlId is globally unique on Document/Collection so the team scope was wrong.
Also removes leftover internal-id generation in JSONAPIImportTask that was
never used in task input/output.

* Restore classes used upstream
This commit is contained in:
Tom Moor
2026-05-25 17:03:02 -04:00
committed by GitHub
parent f9dc1a3983
commit ecafd5f32a
18 changed files with 1397 additions and 473 deletions
@@ -67,6 +67,14 @@ function DropToImport({ disabled, onSubmit, children, format }: Props) {
permission: permission ?? undefined, permission: permission ?? undefined,
} }
); );
} else if (format === FileOperationFormat.JSON) {
await imports.create(
{ service: IntegrationService.JSON },
{
attachmentId: attachment.id,
permission: permission ?? undefined,
}
);
} else { } else {
await collections.import(attachment.id, { format, permission }); await collections.import(attachment.id, { format, permission });
} }
+32 -1
View File
@@ -1,6 +1,11 @@
import type { InferAttributes, InferCreationAttributes } from "sequelize"; import type {
InferAttributes,
InferCreationAttributes,
SaveOptions,
} from "sequelize";
import { import {
AllowNull, AllowNull,
BeforeCreate,
BelongsTo, BelongsTo,
Column, Column,
DataType, DataType,
@@ -14,6 +19,7 @@ import {
import { type ImportInput, type ImportScratch } from "@shared/schema"; import { type ImportInput, type ImportScratch } from "@shared/schema";
import { ImportableIntegrationService, ImportState } from "@shared/types"; import { ImportableIntegrationService, ImportState } from "@shared/types";
import { ImportValidation } from "@shared/validations"; import { ImportValidation } from "@shared/validations";
import { UnprocessableEntityError } from "@server/errors";
import Integration from "./Integration"; import Integration from "./Integration";
import Team from "./Team"; import Team from "./Team";
import User from "./User"; import User from "./User";
@@ -91,6 +97,31 @@ class Import<T extends ImportableIntegrationService> extends ParanoidModel<
@ForeignKey(() => Team) @ForeignKey(() => Team)
@Column(DataType.UUID) @Column(DataType.UUID)
teamId: string; teamId: string;
/**
* Serializes imports per team — blocks creation while another import is
* already in flight. Centralizing the check here lets every code path that
* creates an Import (route handlers, integrations) share one definition of
* "in progress" without duplicating the count query.
*/
@BeforeCreate
// oxlint-disable-next-line @typescript-eslint/no-explicit-any
static async checkInProgress(model: Import<any>, options: SaveOptions) {
const inProgress = await this.count({
where: {
teamId: model.teamId,
state: [
ImportState.Created,
ImportState.InProgress,
ImportState.Processed,
],
},
transaction: options.transaction,
});
if (inProgress) {
throw UnprocessableEntityError("An import is already in progress");
}
}
} }
export default Import; export default Import;
@@ -4,7 +4,6 @@ import type { Event as TEvent, FileOperationEvent } from "@server/types";
import ExportHTMLZipTask from "../tasks/ExportHTMLZipTask"; import ExportHTMLZipTask from "../tasks/ExportHTMLZipTask";
import ExportJSONTask from "../tasks/ExportJSONTask"; import ExportJSONTask from "../tasks/ExportJSONTask";
import ExportMarkdownZipTask from "../tasks/ExportMarkdownZipTask"; import ExportMarkdownZipTask from "../tasks/ExportMarkdownZipTask";
import ImportJSONTask from "../tasks/ImportJSONTask";
import BaseProcessor from "./BaseProcessor"; import BaseProcessor from "./BaseProcessor";
export default class FileOperationCreatedProcessor extends BaseProcessor { export default class FileOperationCreatedProcessor extends BaseProcessor {
@@ -18,20 +17,9 @@ export default class FileOperationCreatedProcessor extends BaseProcessor {
} }
); );
// map file operation type and format to the appropriate task. Markdown // Imports no longer flow through FileOperation — both JSON and Markdown
// zip imports flow through the API-import pipeline (`imports.create` → // zip imports run through the API-import pipeline (`imports.create` →
// MarkdownAPIImportTask) and never reach this dispatcher. // {Markdown,JSON}APIImportTask). This dispatcher only handles exports.
if (fileOperation.type === FileOperationType.Import) {
switch (fileOperation.format) {
case FileOperationFormat.JSON:
await new ImportJSONTask().schedule({
fileOperationId: event.modelId,
});
break;
default:
}
}
if (fileOperation.type === FileOperationType.Export) { if (fileOperation.type === FileOperationType.Export) {
switch (fileOperation.format) { switch (fileOperation.format) {
case FileOperationFormat.HTMLZip: case FileOperationFormat.HTMLZip:
+144 -5
View File
@@ -38,6 +38,7 @@ import { DocumentHelper } from "@server/models/helpers/DocumentHelper";
import { ProsemirrorHelper } from "@server/models/helpers/ProsemirrorHelper"; import { ProsemirrorHelper } from "@server/models/helpers/ProsemirrorHelper";
import { sequelize } from "@server/storage/database"; import { sequelize } from "@server/storage/database";
import type { Event, ImportEvent } from "@server/types"; import type { Event, ImportEvent } from "@server/types";
import { generateUrlId } from "@server/utils/url";
import BaseProcessor from "./BaseProcessor"; import BaseProcessor from "./BaseProcessor";
export const PagePerImportTask = 3; export const PagePerImportTask = 3;
@@ -285,6 +286,9 @@ export default abstract class ImportsProcessor<
const createdCollections: Collection[] = []; const createdCollections: Collection[] = [];
// External id to internal model id. // External id to internal model id.
const idMap: Record<string, string> = {}; const idMap: Record<string, string> = {};
// Cache of resolved external author → internal user id (or undefined when
// no match). Reused across every output in the import.
const userIdCache = new Map<string, string | undefined>();
// These will be imported as collections. Widened to the base input shape // These will be imported as collections. Widened to the base input shape
// because the abstract class has no narrowed view of T. // because the abstract class has no narrowed view of T.
const importInput = keyBy( const importInput = keyBy(
@@ -358,6 +362,14 @@ export default abstract class ImportsProcessor<
teamId: importModel.teamId, teamId: importModel.teamId,
}); });
const resolvedCreatedById =
(await this.resolveExternalUserId(
output,
importModel.teamId,
userIdCache,
transaction
)) ?? importModel.createdById;
if (collectionItem) { if (collectionItem) {
// imported collection will be placed in the beginning. // imported collection will be placed in the beginning.
collectionIdx = fractionalIndex(null, collectionIdx); collectionIdx = fractionalIndex(null, collectionIdx);
@@ -376,16 +388,24 @@ export default abstract class ImportsProcessor<
createdByName: output.author, createdByName: output.author,
}; };
const urlId = await this.preserveCollectionUrlId(
output.urlId,
transaction
);
const collection = Collection.build({ const collection = Collection.build({
id: internalId, id: internalId,
urlId,
name: output.title, name: output.title,
icon: output.icon ?? "collection", icon: output.icon ?? "collection",
color: output.icon ? undefined : randomElement(colorPalette), color:
output.color ??
(output.icon ? undefined : randomElement(colorPalette)),
content: transformedContent, content: transformedContent,
description: truncate(description, { description: truncate(description, {
length: CollectionValidation.maxDescriptionLength, length: CollectionValidation.maxDescriptionLength,
}), }),
createdById: importModel.createdById, createdById: resolvedCreatedById,
teamId: importModel.createdBy.teamId, teamId: importModel.createdBy.teamId,
apiImportId: importModel.id, apiImportId: importModel.id,
index: collectionIdx, index: collectionIdx,
@@ -419,17 +439,24 @@ export default abstract class ImportsProcessor<
const isRootDocument = const isRootDocument =
!parentExternalId || !!importInput[parentExternalId]; !parentExternalId || !!importInput[parentExternalId];
const urlId = await this.preserveDocumentUrlId(
output.urlId,
transaction
);
const defaults = { const defaults = {
title: output.title, title: output.title,
urlId,
icon: output.icon, icon: output.icon,
color: output.color,
content: transformedContent, content: transformedContent,
text: await DocumentHelper.toMarkdown(transformedContent, { text: await DocumentHelper.toMarkdown(transformedContent, {
includeTitle: false, includeTitle: false,
}), }),
collectionId: collectionInternalId, collectionId: collectionInternalId,
parentDocumentId: isRootDocument ? undefined : parentInternalId, parentDocumentId: isRootDocument ? undefined : parentInternalId,
createdById: importModel.createdById, createdById: resolvedCreatedById,
lastModifiedById: importModel.createdById, lastModifiedById: resolvedCreatedById,
teamId: importModel.createdBy.teamId, teamId: importModel.createdBy.teamId,
apiImportId: importModel.id, apiImportId: importModel.id,
sourceMetadata: { sourceMetadata: {
@@ -439,7 +466,11 @@ export default abstract class ImportsProcessor<
}, },
createdAt: output.createdAt ?? now, createdAt: output.createdAt ?? now,
updatedAt: output.updatedAt ?? now, updatedAt: output.updatedAt ?? now,
publishedAt: output.updatedAt ?? output.createdAt ?? now, publishedAt:
output.publishedAt ??
output.updatedAt ??
output.createdAt ??
now,
}; };
try { try {
@@ -612,6 +643,114 @@ export default abstract class ImportsProcessor<
return idMap[externalId]; return idMap[externalId];
} }
/**
* Resolves the original author of an imported item to a user in the target
* team. Tries `createdById` first then falls back to `createdByEmail`; both
* hits and misses are cached. Returns `undefined` when no match is found so
* the caller can fall back to the importing user.
*
* @param output The ImportTaskOutput entry carrying optional original-author
* fields from the source.
* @param teamId Team to scope the lookup to.
* @param cache Map reused across calls within one persistence pass.
* @param transaction Active sequelize transaction.
* @returns The matched internal user id, or undefined.
*/
private async resolveExternalUserId(
output: { createdById?: string; createdByEmail?: string | null },
teamId: string,
cache: Map<string, string | undefined>,
transaction: Transaction
): Promise<string | undefined> {
if (output.createdById) {
const cacheKey = `id:${output.createdById}`;
if (cache.has(cacheKey)) {
return cache.get(cacheKey);
}
const user = await User.findOne({
where: { id: output.createdById, teamId },
transaction,
});
if (user) {
cache.set(cacheKey, user.id);
return user.id;
}
cache.set(cacheKey, undefined);
}
if (output.createdByEmail) {
const email = output.createdByEmail.toLowerCase().trim();
const cacheKey = `email:${email}`;
if (cache.has(cacheKey)) {
return cache.get(cacheKey);
}
const user = await User.findOne({
where: { email, teamId },
transaction,
});
if (user) {
cache.set(cacheKey, user.id);
if (output.createdById) {
cache.set(`id:${output.createdById}`, user.id);
}
return user.id;
}
cache.set(cacheKey, undefined);
}
return undefined;
}
/**
* Honors a urlId from a document export if it does not collide with an
* existing Document, otherwise generates a fresh one. Returns `undefined`
* when no urlId is supplied (so the model's default applies).
*
* @param sourceUrlId The urlId requested by the importer.
* @param transaction Active sequelize transaction.
* @returns A urlId to use, or undefined to fall through to the default.
*/
private async preserveDocumentUrlId(
sourceUrlId: string | undefined,
transaction: Transaction
): Promise<string | undefined> {
if (!sourceUrlId) {
return undefined;
}
const existing = await Document.unscoped().findOne({
attributes: ["id"],
paranoid: false,
where: { urlId: sourceUrlId },
transaction,
});
return existing ? generateUrlId() : sourceUrlId;
}
/**
* Honors a urlId from a collection export if it does not collide with an
* existing Collection, otherwise generates a fresh one. Returns `undefined`
* when no urlId is supplied (so the model's default applies).
*
* @param sourceUrlId The urlId requested by the importer.
* @param transaction Active sequelize transaction.
* @returns A urlId to use, or undefined to fall through to the default.
*/
private async preserveCollectionUrlId(
sourceUrlId: string | undefined,
transaction: Transaction
): Promise<string | undefined> {
if (!sourceUrlId) {
return undefined;
}
const existing = await Collection.unscoped().findOne({
attributes: ["id"],
paranoid: false,
where: { urlId: sourceUrlId },
transaction,
});
return existing ? generateUrlId() : sourceUrlId;
}
/** /**
* Determine whether this import can be processed by this processor. * Determine whether this import can be processed by this processor.
* *
@@ -0,0 +1,35 @@
import type { Transaction } from "sequelize";
import type { ImportTaskInput } from "@shared/schema";
import { ImportTaskPhase, IntegrationService } from "@shared/types";
import type { Import, ImportTask } from "@server/models";
import JSONAPIImportTask from "../tasks/JSONAPIImportTask";
import ImportsProcessor from "./ImportsProcessor";
export default class JSONImportsProcessor extends ImportsProcessor<IntegrationService.JSON> {
protected canProcess(importModel: Import<IntegrationService.JSON>): boolean {
return importModel.service === IntegrationService.JSON;
}
protected getInitialPhase(): ImportTaskPhase {
return ImportTaskPhase.Bootstrap;
}
protected async buildTasksInput(
importModel: Import<IntegrationService.JSON>,
_transaction: Transaction
): Promise<ImportTaskInput<IntegrationService.JSON>> {
if (!importModel.scratch?.storageKey) {
throw new Error(
"JSON import is missing scratch.storageKey for the bootstrap phase"
);
}
return [{ externalId: importModel.input[0].externalId }];
}
protected async scheduleTask(
importTask: ImportTask<IntegrationService.JSON>
): Promise<void> {
await new JSONAPIImportTask().schedule({ importTaskId: importTask.id });
}
}
-155
View File
@@ -1,155 +0,0 @@
import path from "node:path";
import { FileOperation, User } from "@server/models";
import {
buildFileOperation,
buildUser,
buildTeam,
buildAdmin,
} from "@server/test/factories";
import ImportJSONTask from "./ImportJSONTask";
// The fixture has these values for both documents:
// createdById: "ccec260a-e060-4925-ade8-17cfabaf2cac"
// createdByEmail: "hmac.devo@gmail.com"
const fixtureCreatedById = "ccec260a-e060-4925-ade8-17cfabaf2cac";
const fixtureCreatedByEmail = "hmac.devo@gmail.com";
const fixturePath = path.resolve(
__dirname,
"..",
"..",
"test",
"fixtures",
"outline-json.zip"
);
function mockHandle(fileOperation: FileOperation) {
Object.defineProperty(fileOperation, "handle", {
get() {
return {
path: fixturePath,
cleanup: async () => {},
};
},
});
vi.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
}
describe("ImportJSONTask", () => {
it("should import the documents, attachments", async () => {
const fileOperation = await buildFileOperation();
mockHandle(fileOperation);
const task = new ImportJSONTask();
const response = await task.perform({
fileOperationId: fileOperation.id,
});
expect(response.collections.size).toEqual(1);
expect(response.documents.size).toEqual(2);
expect(response.attachments.size).toEqual(1);
});
describe("user mapping", () => {
it("should map createdById to an existing user by ID", async () => {
// Ensure a user exists with the fixture's createdById, handling the
// case where it may already exist from a prior test run.
let originalAuthor = await User.findByPk(fixtureCreatedById);
const teamId = originalAuthor?.teamId ?? (await buildTeam()).id;
if (!originalAuthor) {
originalAuthor = await buildUser({
id: fixtureCreatedById,
teamId,
});
}
const admin = await buildAdmin({ teamId });
const fileOperation = await buildFileOperation({
userId: admin.id,
teamId,
});
mockHandle(fileOperation);
const task = new ImportJSONTask();
const response = await task.perform({
fileOperationId: fileOperation.id,
});
for (const document of response.documents.values()) {
expect(document.createdById).toEqual(originalAuthor.id);
expect(document.lastModifiedById).toEqual(originalAuthor.id);
}
});
it("should fall back to email matching when ID does not match", async () => {
const team = await buildTeam();
// User has matching email but a different ID
const originalAuthor = await buildUser({
teamId: team.id,
email: fixtureCreatedByEmail,
});
const admin = await buildAdmin({ teamId: team.id });
const fileOperation = await buildFileOperation({
userId: admin.id,
teamId: team.id,
});
mockHandle(fileOperation);
const task = new ImportJSONTask();
const response = await task.perform({
fileOperationId: fileOperation.id,
});
for (const document of response.documents.values()) {
expect(document.createdById).toEqual(originalAuthor.id);
expect(document.lastModifiedById).toEqual(originalAuthor.id);
}
});
it("should fall back to importing user when no match is found", async () => {
const team = await buildTeam();
const admin = await buildAdmin({ teamId: team.id });
const fileOperation = await buildFileOperation({
userId: admin.id,
teamId: team.id,
});
mockHandle(fileOperation);
const task = new ImportJSONTask();
const response = await task.perform({
fileOperationId: fileOperation.id,
});
for (const document of response.documents.values()) {
expect(document.createdById).toEqual(admin.id);
expect(document.lastModifiedById).toEqual(admin.id);
}
});
it("should not match users from a different team", async () => {
const team = await buildTeam();
const otherTeam = await buildTeam();
// Create user with matching email in a different team
await buildUser({
teamId: otherTeam.id,
email: fixtureCreatedByEmail,
});
const admin = await buildAdmin({ teamId: team.id });
const fileOperation = await buildFileOperation({
userId: admin.id,
teamId: team.id,
});
mockHandle(fileOperation);
const task = new ImportJSONTask();
const response = await task.perform({
fileOperationId: fileOperation.id,
});
for (const document of response.documents.values()) {
expect(document.createdById).toEqual(admin.id);
}
});
});
});
-246
View File
@@ -1,246 +0,0 @@
import path from "node:path";
import fs from "fs-extra";
import { find } from "es-toolkit/compat";
import mime from "mime-types";
import { Fragment, Node } from "prosemirror-model";
import { randomUUID } from "node:crypto";
import type { ProsemirrorData } from "@shared/types";
import { schema, serializer } from "@server/editor";
import Logger from "@server/logging/Logger";
import type { FileOperation } from "@server/models";
import { Attachment } from "@server/models";
import type {
AttachmentJSONExport,
CollectionJSONExport,
DocumentJSONExport,
JSONExportMetadata,
} from "@server/types";
import type { FileTreeNode } from "@server/utils/ImportHelper";
import ImportHelper from "@server/utils/ImportHelper";
import type { StructuredImportData } from "./ImportTask";
import ImportTask from "./ImportTask";
export default class ImportJSONTask extends ImportTask {
public async parseData(
dirPath: string,
_: FileOperation
): Promise<StructuredImportData> {
const tree = await ImportHelper.toFileTree(dirPath);
if (!tree) {
throw new Error("Could not find valid content in zip file");
}
return this.parseFileTree(tree.children);
}
/**
* Converts the file structure from zipAsFileTree into documents,
* collections, and attachments.
*
* @param tree An array of FileTreeNode representing root files in the zip
* @returns A StructuredImportData object
*/
private async parseFileTree(
tree: FileTreeNode[]
): Promise<StructuredImportData> {
let rootPath = "";
const output: StructuredImportData = {
collections: [],
documents: [],
attachments: [],
};
// Load metadata
let metadata: JSONExportMetadata | undefined = undefined;
for (const node of tree) {
if (!rootPath) {
rootPath = path.dirname(node.path);
}
if (node.path === "metadata.json") {
try {
metadata = JSON.parse(await fs.readFile(node.path, "utf8"));
} catch (err) {
throw new Error(`Could not parse metadata.json. ${err.message}`);
}
}
}
if (!rootPath) {
throw new Error("Could not find root path");
}
Logger.debug("task", "Importing JSON metadata", { metadata });
function mapDocuments(
documents: { [id: string]: DocumentJSONExport },
collectionId: string
) {
Object.values(documents).forEach((node) => {
const id = randomUUID();
output.documents.push({
...node,
path: "",
text: "",
data: node.data,
icon: node.icon ?? node.emoji,
color: node.color,
createdAt: node.createdAt ? new Date(node.createdAt) : undefined,
updatedAt: node.updatedAt ? new Date(node.updatedAt) : undefined,
publishedAt: node.publishedAt ? new Date(node.publishedAt) : null,
collectionId,
externalId: node.id,
mimeType: "application/json",
parentDocumentId: node.parentDocumentId
? find(
output.documents,
(d) => d.externalId === node.parentDocumentId
)?.id
: null,
id,
});
});
}
function mapAttachments(attachments: {
[id: string]: AttachmentJSONExport;
}) {
Object.values(attachments).forEach((node) => {
const id = randomUUID();
const mimeType = mime.lookup(node.key) || "application/octet-stream";
const filePath = path.join(rootPath, node.key);
// Block path traversal attempts
if (node.key.includes("..")) {
throw new Error(`Invalid attachment path: ${node.key}`);
}
const resolvedPath = path.resolve(filePath);
if (!resolvedPath.startsWith(path.resolve(rootPath) + path.sep)) {
throw new Error(`Invalid attachment path: ${node.key}`);
}
output.attachments.push({
id,
name: node.name,
buffer: () => fs.readFile(filePath),
mimeType,
path: node.key,
externalId: node.id,
});
});
}
// All nodes in the root level should be collections as JSON + metadata
for (const node of tree) {
if (node.children.length > 0 || node.path.endsWith("metadata.json")) {
continue;
}
let item: CollectionJSONExport;
try {
item = JSON.parse(await fs.readFile(node.path, "utf8"));
} catch (err) {
throw new Error(`Could not parse ${node.path}. ${err.message}`);
}
const collectionId = randomUUID();
output.collections.push({
...item.collection,
id: collectionId,
externalId: item.collection.id,
});
if (Object.values(item.documents).length) {
mapDocuments(item.documents, collectionId);
}
if (Object.values(item.attachments).length) {
mapAttachments(item.attachments);
}
}
// Check all of the attachments we've created against urls and
// replace them with the correct redirect urls before continuing.
if (output.attachments.length) {
this.replaceAttachmentURLs(output);
}
return output;
}
private replaceAttachmentURLs(output: StructuredImportData) {
const attachmentTypes = ["attachment", "image", "video"];
const urlRegex = /\/api\/attachments.redirect\?id=(.+)/;
const attachmentExternalIdMap = output.attachments.reduce(
(obj, attachment) => {
if (attachment.externalId) {
obj[attachment.externalId] = attachment;
}
return obj;
},
{} as Record<string, StructuredImportData["attachments"][number]>
);
const getRedirectPath = (existingPath?: string): string | undefined => {
if (!existingPath) {
return;
}
const match = existingPath.match(urlRegex);
if (!match) {
return existingPath;
}
const attachment = attachmentExternalIdMap[match[1]];
// maintain the existing behaviour of using existingPath when attachment id is not present.
return attachment
? Attachment.getRedirectUrl(attachment.id)
: existingPath;
};
const transformAttachmentNode = (node: Node): Node => {
const json = node.toJSON() as ProsemirrorData;
const attrs = json.attrs ?? {};
if (node.type.name === "attachment") {
// attachment node uses 'href' attribute
attrs.href = getRedirectPath(attrs.href as string);
} else if (node.type.name === "image" || node.type.name === "video") {
// image & video nodes use 'src' attribute
attrs.src = getRedirectPath(attrs.src as string);
}
json.attrs = attrs;
return Node.fromJSON(schema, json);
};
const transformFragment = (fragment: Fragment): Fragment => {
const nodes: Node[] = [];
fragment.forEach((node) => {
nodes.push(
attachmentTypes.includes(node.type.name)
? transformAttachmentNode(node)
: node.copy(transformFragment(node.content))
);
});
return Fragment.fromArray(nodes);
};
for (const collection of output.collections) {
const node = Node.fromJSON(schema, collection.data);
const transformedNode = node.copy(transformFragment(node.content));
collection.description = serializer.serialize(transformedNode);
collection.data = transformedNode.toJSON();
}
for (const document of output.documents) {
const node = Node.fromJSON(schema, document.data);
const transformedNode = node.copy(transformFragment(node.content));
document.data = transformedNode.toJSON();
document.text = serializer.serialize(transformedNode);
}
}
}
@@ -0,0 +1,458 @@
import { randomUUID } from "node:crypto";
import fs from "fs-extra";
import JSZip from "jszip";
import tmp from "tmp";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import {
Attachment,
Collection,
Document,
ImportTask,
User,
} from "@server/models";
import FileStorage from "@server/storage/files";
import {
CollectionPermission,
ImportTaskPhase,
ImportTaskState,
IntegrationService,
} from "@shared/types";
import {
buildAdmin,
buildImport,
buildTeam,
buildUser,
} from "@server/test/factories";
import JSONImportsProcessor from "../processors/JSONImportsProcessor";
import JSONAPIImportTask, {
rewriteAttachmentReferences,
} from "./JSONAPIImportTask";
// Fixed external IDs and email used across the user-mapping tests — these
// are the values written into every freshly generated zip.
const FIXTURE_USER_ID = "ccec260a-e060-4925-ade8-17cfabaf2cac";
const FIXTURE_USER_EMAIL = "hmac.devo@gmail.com";
interface BuiltZip {
filePath: string;
cleanup: () => Promise<void>;
}
/**
* Builds a self-contained JSON export zip in a tmp file. Each call produces
* fresh urlIds and document ids so concurrent tests (across files) never
* collide on `urlId` uniqueness. The structure matches what ExportJSONTask
* produces: a single collection JSON + metadata.json at the zip root,
* documents carrying source user attribution, plus one referenced
* attachment.
*/
async function buildJSONExportZip(): Promise<BuiltZip> {
const collectionExternalId = randomUUID();
const collectionUrlId = randomUrlId();
const documentOneId = randomUUID();
const documentOneUrlId = randomUrlId();
const documentTwoId = randomUUID();
const documentTwoUrlId = randomUrlId();
const attachmentExternalId = randomUUID();
const attachmentKey = `uploads/${FIXTURE_USER_ID}/${attachmentExternalId}/pikachu.jpg`;
const collectionExport = {
collection: {
id: collectionExternalId,
urlId: collectionUrlId,
name: "Test JSON",
data: { type: "doc", content: [{ type: "paragraph" }] },
sort: { field: "index", direction: "asc" },
icon: "beaker",
color: "#FF825C",
permission: null,
documentStructure: [
{ id: documentOneId, title: "Document 1", children: [] },
{ id: documentTwoId, title: "Document 2", children: [] },
],
},
documents: {
[documentOneId]: {
id: documentOneId,
urlId: documentOneUrlId,
title: "Document 1",
icon: null,
color: null,
data: {
type: "doc",
content: [
{
type: "paragraph",
content: [{ type: "text", text: "Some random text" }],
},
{
type: "paragraph",
content: [
{
type: "image",
attrs: {
src: `/api/attachments.redirect?id=${attachmentExternalId}`,
alt: null,
layoutClass: null,
title: null,
},
},
],
},
],
},
createdById: FIXTURE_USER_ID,
createdByName: "hmac.devo",
createdByEmail: FIXTURE_USER_EMAIL,
createdAt: "2024-07-18T18:03:41.622Z",
updatedAt: "2024-07-18T18:04:46.133Z",
publishedAt: "2024-07-18T18:03:45.710Z",
fullWidth: false,
parentDocumentId: null,
},
[documentTwoId]: {
id: documentTwoId,
urlId: documentTwoUrlId,
title: "Document 2",
icon: null,
color: null,
data: {
type: "doc",
content: [
{
type: "paragraph",
content: [{ type: "text", text: "Doc two body" }],
},
],
},
createdById: FIXTURE_USER_ID,
createdByName: "hmac.devo",
createdByEmail: FIXTURE_USER_EMAIL,
createdAt: "2024-07-18T18:03:41.622Z",
updatedAt: "2024-07-18T18:04:46.133Z",
publishedAt: "2024-07-18T18:03:45.710Z",
fullWidth: false,
parentDocumentId: null,
},
},
attachments: {
[attachmentExternalId]: {
id: attachmentExternalId,
documentId: documentOneId,
contentType: "image/jpeg",
name: "pikachu.jpg",
size: 6,
key: attachmentKey,
},
},
};
const metadata = {
exportVersion: 1,
version: "0.78.0-0",
createdAt: "2024-07-18T18:18:14.221Z",
createdById: FIXTURE_USER_ID,
createdByEmail: FIXTURE_USER_EMAIL,
};
const zip = new JSZip();
zip.file("metadata.json", JSON.stringify(metadata));
zip.file("Test JSON.json", JSON.stringify(collectionExport));
zip.file(attachmentKey, Buffer.from("pixels"));
const buffer = await zip.generateAsync({ type: "nodebuffer" });
const filePath: string = await new Promise((resolve, reject) => {
tmp.file({ postfix: ".zip" }, (err, p) => (err ? reject(err) : resolve(p)));
});
await fs.writeFile(filePath, buffer);
return {
filePath,
cleanup: async () => {
await fs.rm(filePath, { force: true }).catch(() => {});
},
};
}
function randomUrlId(): string {
return Math.random().toString(36).slice(2, 12).padEnd(10, "x").slice(0, 10);
}
/**
* Drives a JSON import end-to-end against an in-memory fixture: runs the
* bootstrap task, every subsequent per-page wave, then invokes the
* `imports.processed` handler so collections/documents/attachments land.
*/
async function runImport(opts: {
teamId: string;
createdById: string;
zipPath: string;
}): Promise<{ importId: string }> {
vi.spyOn(FileStorage, "getFileHandle").mockResolvedValue({
path: opts.zipPath,
cleanup: async () => {},
});
const importModel = await buildImport({
teamId: opts.teamId,
createdById: opts.createdById,
service: IntegrationService.JSON,
integrationId: null,
input: [
{ externalId: randomUUID(), permission: CollectionPermission.Read },
],
scratch: { storageKey: "fixture-key" },
});
// Seed the bootstrap row that JSONImportsProcessor would have created.
const placeholderExternalId = (importModel.input[0] as { externalId: string })
.externalId;
const bootstrapTask = await ImportTask.create<
ImportTask<IntegrationService.JSON>
>({
state: ImportTaskState.Created,
phase: ImportTaskPhase.Bootstrap,
input: [{ externalId: placeholderExternalId }],
importId: importModel.id,
} as Parameters<typeof ImportTask.create>[0]);
// Bull's schedule() would queue follow-on work. Drive it inline by
// re-running perform() against every Created task in createdAt order.
vi.spyOn(JSONAPIImportTask.prototype, "schedule").mockResolvedValue(
undefined as never
);
await new JSONAPIImportTask().perform({ importTaskId: bootstrapTask.id });
let remaining = await ImportTask.findOne<ImportTask<IntegrationService.JSON>>(
{
where: { state: ImportTaskState.Created, importId: importModel.id },
order: [["createdAt", "ASC"]],
}
);
while (remaining) {
await new JSONAPIImportTask().perform({ importTaskId: remaining.id });
remaining = await ImportTask.findOne<ImportTask<IntegrationService.JSON>>({
where: { state: ImportTaskState.Created, importId: importModel.id },
order: [["createdAt", "ASC"]],
});
}
// Once all per-task work is done, APIImportTask transitions the Import to
// Processed and fires the persistence pass via JSONImportsProcessor.
await new JSONAPIImportTask().perform({
importTaskId: bootstrapTask.id,
});
await new JSONImportsProcessor().perform({
name: "imports.processed",
modelId: importModel.id,
teamId: opts.teamId,
actorId: opts.createdById,
ip: "127.0.0.1",
changes: { attributes: {}, previous: {} },
});
return { importId: importModel.id };
}
describe("JSONAPIImportTask", () => {
let zip: BuiltZip;
beforeEach(async () => {
zip = await buildJSONExportZip();
});
afterEach(async () => {
await zip.cleanup();
});
it("imports collections, documents and attachments from the fixture", async () => {
const admin = await buildAdmin();
const { importId } = await runImport({
teamId: admin.teamId,
createdById: admin.id,
zipPath: zip.filePath,
});
const collections = await Collection.findAll({
where: { apiImportId: importId },
});
const documents = await Document.findAll({
where: { apiImportId: importId },
});
const attachments = await Attachment.findAll({
where: { teamId: admin.teamId },
});
expect(collections.length).toBe(1);
expect(documents.length).toBe(2);
expect(attachments.length).toBeGreaterThanOrEqual(1);
});
describe("user mapping", () => {
it("maps createdById to an existing user by ID", async () => {
let originalAuthor = await User.findByPk(FIXTURE_USER_ID);
const teamId = originalAuthor?.teamId ?? (await buildTeam()).id;
if (!originalAuthor) {
originalAuthor = await buildUser({ id: FIXTURE_USER_ID, teamId });
}
const admin = await buildAdmin({ teamId });
const { importId } = await runImport({
teamId,
createdById: admin.id,
zipPath: zip.filePath,
});
const documents = await Document.findAll({
where: { apiImportId: importId },
});
expect(documents.length).toBe(2);
for (const document of documents) {
expect(document.createdById).toBe(originalAuthor.id);
expect(document.lastModifiedById).toBe(originalAuthor.id);
}
});
it("falls back to email matching when ID does not match", async () => {
const team = await buildTeam();
const originalAuthor = await buildUser({
teamId: team.id,
email: FIXTURE_USER_EMAIL,
});
const admin = await buildAdmin({ teamId: team.id });
const { importId } = await runImport({
teamId: team.id,
createdById: admin.id,
zipPath: zip.filePath,
});
const documents = await Document.findAll({
where: { apiImportId: importId },
});
expect(documents.length).toBe(2);
for (const document of documents) {
expect(document.createdById).toBe(originalAuthor.id);
expect(document.lastModifiedById).toBe(originalAuthor.id);
}
});
it("falls back to importing user when no match is found", async () => {
const team = await buildTeam();
const admin = await buildAdmin({ teamId: team.id });
const { importId } = await runImport({
teamId: team.id,
createdById: admin.id,
zipPath: zip.filePath,
});
const documents = await Document.findAll({
where: { apiImportId: importId },
});
expect(documents.length).toBe(2);
for (const document of documents) {
expect(document.createdById).toBe(admin.id);
expect(document.lastModifiedById).toBe(admin.id);
}
});
it("does not match users from a different team", async () => {
const team = await buildTeam();
const otherTeam = await buildTeam();
await buildUser({
teamId: otherTeam.id,
email: FIXTURE_USER_EMAIL,
});
const admin = await buildAdmin({ teamId: team.id });
const { importId } = await runImport({
teamId: team.id,
createdById: admin.id,
zipPath: zip.filePath,
});
const documents = await Document.findAll({
where: { apiImportId: importId },
});
expect(documents.length).toBe(2);
for (const document of documents) {
expect(document.createdById).toBe(admin.id);
}
});
});
});
describe("rewriteAttachmentReferences", () => {
it("rewrites image src to new attachment id", () => {
const out = rewriteAttachmentReferences(
{
type: "doc",
content: [
{
type: "paragraph",
content: [
{
type: "image",
attrs: {
src: "/api/attachments.redirect?id=external-1",
alt: null,
},
},
],
},
],
},
{ "external-1": "new-1" }
);
const image = out.content?.[0].content?.[0];
expect(image?.attrs?.src).toBe("/api/attachments.redirect?id=new-1");
});
it("rewrites attachment node href and id together", () => {
const out = rewriteAttachmentReferences(
{
type: "doc",
content: [
{
type: "attachment",
attrs: {
id: "external-2",
href: "/api/attachments.redirect?id=external-2",
title: "a.pdf",
},
},
],
},
{ "external-2": "new-2" }
);
const attachment = out.content?.[0];
expect(attachment?.attrs?.href).toBe("/api/attachments.redirect?id=new-2");
expect(attachment?.attrs?.id).toBe("new-2");
});
it("leaves unknown references untouched", () => {
const out = rewriteAttachmentReferences(
{
type: "doc",
content: [
{
type: "paragraph",
content: [
{
type: "image",
attrs: {
src: "/api/attachments.redirect?id=does-not-exist",
},
},
],
},
],
},
{ "external-1": "new-1" }
);
const image = out.content?.[0].content?.[0];
expect(image?.attrs?.src).toBe(
"/api/attachments.redirect?id=does-not-exist"
);
});
});
+544
View File
@@ -0,0 +1,544 @@
import path from "node:path";
import { randomUUID } from "node:crypto";
import { Fragment, Node } from "prosemirror-model";
import { UniqueConstraintError } from "sequelize";
import type {
ImportTaskInput,
ImportTaskOutput,
JSONAttachmentManifestItem,
JSONPageImportTaskInputItem,
} from "@shared/schema";
import type {
IntegrationService,
ProsemirrorData,
ProsemirrorDoc,
} from "@shared/types";
import { AttachmentPreset } from "@shared/types";
import attachmentCreator from "@server/commands/attachmentCreator";
import { createContext } from "@server/context";
import { schema } from "@server/editor";
import env from "@server/env";
import Logger from "@server/logging/Logger";
import type { ImportTask } from "@server/models";
import { Attachment } from "@server/models";
import AttachmentHelper from "@server/models/helpers/AttachmentHelper";
import { ProsemirrorHelper } from "@server/models/helpers/ProsemirrorHelper";
import { sequelize } from "@server/storage/database";
import FileStorage from "@server/storage/files";
import type {
AttachmentJSONExport,
CollectionJSONExport,
DocumentJSONExport,
JSONExportMetadata,
} from "@server/types";
import ZipHelper from "@server/utils/ZipHelper";
import type { ProcessOutput } from "./APIImportTask";
import APIImportTask from "./APIImportTask";
type Service = IntegrationService.JSON;
const REDIRECT_URL_REGEX = /\/api\/attachments\.redirect\?id=([^&"'\s)]+)/g;
const ATTACHMENT_NODE_TYPES = ["attachment", "image", "video"];
interface DiscoveredDocument {
externalId: string;
parentExternalId?: string;
collectionExternalId: string;
export: DocumentJSONExport;
children: DiscoveredDocument[];
}
/**
* Rewrites `/api/attachments.redirect?id=<externalId>` references in a
* ProseMirror document to point at the corresponding new attachment ids.
* Operates on both `href` (attachment nodes) and `src` (image / video nodes).
* Also updates the `id` attribute on attachment nodes so it lines up with the
* created Attachment row. Unknown ids are left intact so a malformed export
* cannot crash the importer.
*
* Exported for tests; not part of the module's public surface.
*
* @param content ProseMirror content from a document or collection.
* @param attachmentIdMap Map of external attachment id → new internal id.
* @returns ProseMirror content with rewritten attachment references.
*/
export function rewriteAttachmentReferences(
content: ProsemirrorData,
attachmentIdMap: Record<string, string>
): ProsemirrorData {
const rewriteUrl = (url?: string): string | undefined => {
if (!url) {
return url;
}
return url.replace(REDIRECT_URL_REGEX, (full, externalId: string) => {
const newId = attachmentIdMap[externalId];
return newId ? Attachment.getRedirectUrl(newId) : full;
});
};
const transformAttachmentNode = (node: Node): Node => {
const json = node.toJSON() as ProsemirrorData;
const attrs = { ...(json.attrs ?? {}) };
if (node.type.name === "attachment") {
const href = attrs.href as string | undefined;
attrs.href = rewriteUrl(href);
// Keep `id` aligned with the rewritten URL so downstream consumers that
// read the attachment id (e.g. size hydration in ImportsProcessor) can
// resolve it via the new Attachment row.
if (typeof attrs.id === "string" && attachmentIdMap[attrs.id]) {
attrs.id = attachmentIdMap[attrs.id];
} else if (typeof href === "string") {
const match = /\/api\/attachments\.redirect\?id=([^&"'\s)]+)/.exec(
href
);
if (match && attachmentIdMap[match[1]]) {
attrs.id = attachmentIdMap[match[1]];
}
}
} else if (node.type.name === "image" || node.type.name === "video") {
attrs.src = rewriteUrl(attrs.src as string | undefined);
}
json.attrs = attrs;
return Node.fromJSON(schema, json);
};
const transformFragment = (fragment: Fragment): Fragment => {
const nodes: Node[] = [];
fragment.forEach((node) => {
nodes.push(
ATTACHMENT_NODE_TYPES.includes(node.type.name)
? transformAttachmentNode(node)
: node.copy(transformFragment(node.content))
);
});
return Fragment.fromArray(nodes);
};
const doc = Node.fromJSON(schema, content);
return doc.copy(transformFragment(doc.content)).toJSON() as ProsemirrorData;
}
export default class JSONAPIImportTask extends APIImportTask<Service> {
protected shouldUploadAttachmentsPerPage(): boolean {
return false;
}
protected async scheduleNextTask(importTask: ImportTask<Service>) {
await new JSONAPIImportTask().schedule({ importTaskId: importTask.id });
}
protected async onAllTasksCompleted(
lastImportTask: ImportTask<Service>
): Promise<void> {
const scratch = lastImportTask.import.scratch;
if (!scratch?.storageKey || !scratch.manifest?.length) {
return;
}
const handle = await FileStorage.getFileHandle(scratch.storageKey);
try {
const createdBy = lastImportTask.import.createdBy;
const manifestByPath = new Map<string, JSONAttachmentManifestItem>(
scratch.manifest.map((item) => [item.pathInZip, item])
);
const maxAttachmentSize = AttachmentHelper.presetToMaxUploadSize(
AttachmentPreset.DocumentAttachment
);
const seen = new Set<string>();
await ZipHelper.walk(handle.path, async (entry) => {
if (entry.isDirectory) {
return;
}
// Normalize to match the bootstrap-phase pathInZip (segments rejoined
// with `/`, no leading `./` or empty segments).
const normalized = entry.fileName
.split("/")
.filter((s) => s !== "" && s !== ".")
.join("/");
const item = manifestByPath.get(normalized);
if (!item) {
return;
}
seen.add(item.pathInZip);
const buffer = await entry.readBuffer(maxAttachmentSize);
try {
await sequelize.transaction(async (transaction) =>
attachmentCreator({
source: "import",
preset: AttachmentPreset.DocumentAttachment,
id: item.id,
name: item.name,
type: item.mimeType,
buffer,
user: createdBy,
ctx: createContext({ user: createdBy, transaction }),
fetchOptions: {
timeout: env.FILE_STORAGE_IMPORT_TIMEOUT,
},
})
);
} catch (err) {
// Each attachment commits in its own transaction, so a retry of
// this hook can re-encounter ids that already landed. Treat the
// unique-id collision as a no-op so the import remains resumable.
if (err instanceof UniqueConstraintError) {
return;
}
throw err;
}
});
for (const item of scratch.manifest) {
if (!seen.has(item.pathInZip)) {
Logger.warn(
`JSON import attachment missing in zip, skipping: ${item.pathInZip}`
);
}
}
} finally {
await handle.cleanup().catch(() => {});
}
}
protected async processBootstrap(
importTask: ImportTask<Service>
): Promise<ProcessOutput<Service>> {
const storageKey = importTask.import.scratch?.storageKey;
if (!storageKey) {
throw new Error("JSON import is missing scratch.storageKey");
}
const handle = await FileStorage.getFileHandle(storageKey);
try {
// Pre-load every JSON file at the top level of the zip during the walk.
// ZipHelper streams the archive end-to-end; capturing here means we can
// pair tree nodes with their parsed content without re-opening the zip.
const jsonByPath = new Map<string, unknown>();
const maxJSONSize = AttachmentHelper.presetToMaxUploadSize(
AttachmentPreset.WorkspaceImport
);
const tree = await ZipHelper.toFileTree(
handle.path,
async (node, entry) => {
if (path.extname(node.name).toLowerCase() !== ".json") {
return;
}
const buffer = await entry.readBuffer(maxJSONSize);
try {
jsonByPath.set(node.pathInZip, JSON.parse(buffer.toString("utf8")));
} catch (err) {
throw new Error(
`Could not parse ${node.name}. ${err instanceof Error ? err.message : "unknown error"}`
);
}
}
);
if (tree.children.length === 0) {
throw new Error("Could not find valid content in zip file");
}
const metadata = jsonByPath.get("metadata.json") as
| JSONExportMetadata
| undefined;
Logger.debug("task", "Importing JSON metadata", { metadata });
const manifest: JSONAttachmentManifestItem[] = [];
// External attachment id → manifest entry id (the new Attachment.id).
const attachmentIdMap: Record<string, string> = {};
const collectionExports: {
externalId: string;
data: CollectionJSONExport;
}[] = [];
for (const node of tree.children) {
if (node.children.length > 0 || node.name === "metadata.json") {
continue;
}
if (path.extname(node.name).toLowerCase() !== ".json") {
Logger.debug("task", `Unhandled file in zip: ${node.pathInZip}`, {
importTaskId: importTask.id,
});
continue;
}
const parsed = jsonByPath.get(node.pathInZip) as
| CollectionJSONExport
| undefined;
if (!parsed) {
continue;
}
const collectionExternalId = parsed.collection.id;
collectionExports.push({
externalId: collectionExternalId,
data: parsed,
});
for (const attachment of Object.values(parsed.attachments ?? {})) {
this.registerAttachment(attachment, manifest, attachmentIdMap);
}
}
// Discover documents per collection, building the parent/child tree
// shape expected by the per-page cascade.
const collections = collectionExports.map((c) =>
this.buildCollection(c.externalId, c.data)
);
// Replace anything past the create-time placeholder with the freshly
// discovered collections so a retried bootstrap doesn't accumulate
// duplicate entries.
const associatedImport = importTask.import;
const placeholder = associatedImport.input[0];
associatedImport.input = [
placeholder,
...collections.map((c) => ({
externalId: c.externalId,
permission: placeholder.permission,
})),
];
associatedImport.scratch = { storageKey, manifest };
await associatedImport.save();
// Collection placeholder items so ImportsProcessor iterates them
// during the bootstrap row (the earliest createdAt) — that guarantees
// collections land in the DB before any per-page document references
// them.
importTask.input = [
importTask.input[0],
...collections.map<JSONPageImportTaskInputItem>((c) => ({
externalId: c.externalId,
title: c.export.name,
urlId: c.export.urlId,
icon: c.export.icon,
color: c.export.color,
data: c.export.data ?? ProsemirrorHelper.getEmptyDocument(),
attachmentIdMap,
})),
];
const collectionOutputs: ImportTaskOutput = collections.map((c) => ({
externalId: c.externalId,
title: c.export.name,
urlId: c.export.urlId,
icon: c.export.icon,
color: c.export.color,
content: rewriteAttachmentReferences(
c.export.data ?? ProsemirrorHelper.getEmptyDocument(),
attachmentIdMap
) as ProsemirrorDoc,
}));
// First wave of document tasks: only top-level docs in each collection.
// Each carries its descendants in `children` and the per-page handler
// re-emits them as the next wave of childTasksInput, producing a strict
// depth-ordered cascade of ImportTask rows so parent FKs are always
// satisfied at child-doc creation time.
const childTasksInput: ImportTaskInput<Service> = collections.flatMap(
(c) => c.children.map((d) => this.toPageInput(d, attachmentIdMap))
);
return { taskOutput: collectionOutputs, childTasksInput };
} finally {
await handle.cleanup().catch(() => {});
}
}
protected async processPage(
importTask: ImportTask<Service>
): Promise<ProcessOutput<Service>> {
const taskOutput: ImportTaskOutput = [];
const childTasksInput: JSONPageImportTaskInputItem[] = [];
const items = importTask.input as JSONPageImportTaskInputItem[];
for (const item of items) {
const transformed = rewriteAttachmentReferences(
item.data,
item.attachmentIdMap
) as ProsemirrorDoc;
taskOutput.push({
externalId: item.externalId,
title: item.title,
urlId: item.urlId,
icon: item.icon,
color: item.color,
author: item.createdByName,
createdById: item.createdById,
createdByEmail: item.createdByEmail,
createdAt: item.createdAt ? new Date(item.createdAt) : undefined,
updatedAt: item.updatedAt ? new Date(item.updatedAt) : undefined,
publishedAt: item.publishedAt ? new Date(item.publishedAt) : null,
content: transformed,
});
if (item.children?.length) {
childTasksInput.push(...item.children);
}
}
return { taskOutput, childTasksInput };
}
/**
* Discovers documents in a parsed CollectionJSONExport, recursively packing
* each parent's direct descendants into `children`. Falls back to the
* export's `documentStructure` when present (preserves authored order) and
* otherwise walks the `documents` map.
*
* @param externalId The collection's external id.
* @param data Parsed CollectionJSONExport.
* @returns A collection record with a tree of `DiscoveredDocument`s.
*/
private buildCollection(
externalId: string,
data: CollectionJSONExport
): {
externalId: string;
export: CollectionJSONExport["collection"];
children: DiscoveredDocument[];
} {
const docMap: Record<string, DocumentJSONExport> = data.documents ?? {};
const makeNode = (
doc: DocumentJSONExport,
parentExternalId?: string
): DiscoveredDocument => ({
externalId: doc.id,
parentExternalId: parentExternalId ?? doc.parentDocumentId ?? undefined,
collectionExternalId: externalId,
export: doc,
children: [],
});
// Prefer the authored `documentStructure` if available — it preserves
// sibling order; otherwise fall back to parent/child links.
const roots: DiscoveredDocument[] = [];
if (data.collection.documentStructure?.length) {
const walk = (
navNodes: { id: string; children?: typeof navNodes }[],
parentExternalId: string | undefined,
out: DiscoveredDocument[]
) => {
for (const nav of navNodes) {
const doc = docMap[nav.id];
if (!doc) {
continue;
}
const node = makeNode(doc, parentExternalId);
out.push(node);
if (nav.children?.length) {
walk(nav.children, doc.id, node.children);
}
}
};
walk(data.collection.documentStructure, undefined, roots);
} else {
const byParent = new Map<string | undefined, DocumentJSONExport[]>();
for (const doc of Object.values(docMap)) {
const parent = doc.parentDocumentId ?? undefined;
const bucket = byParent.get(parent) ?? [];
bucket.push(doc);
byParent.set(parent, bucket);
}
const walk = (
parentExternalId: string | undefined,
out: DiscoveredDocument[]
) => {
const docs = byParent.get(parentExternalId) ?? [];
for (const doc of docs) {
const node = makeNode(doc, parentExternalId);
out.push(node);
walk(doc.id, node.children);
}
};
walk(undefined, roots);
}
return {
externalId,
export: data.collection,
children: roots,
};
}
/**
* Records an attachment in the manifest and the external→new id map. Skips
* duplicates so collections that share an attachment id (unlikely in a
* valid export, but possible) only land once.
*
* @param attachment The attachment as it appears in the export.
* @param manifest Manifest array to push entries into.
* @param attachmentIdMap External id → new internal id map.
*/
private registerAttachment(
attachment: AttachmentJSONExport,
manifest: JSONAttachmentManifestItem[],
attachmentIdMap: Record<string, string>
): void {
if (attachmentIdMap[attachment.id]) {
return;
}
if (attachment.key.includes("..")) {
throw new Error(`Invalid attachment path: ${attachment.key}`);
}
const id = randomUUID();
attachmentIdMap[attachment.id] = id;
manifest.push({
id,
externalId: attachment.id,
name: attachment.name,
mimeType: attachment.contentType || "application/octet-stream",
pathInZip: attachment.key,
});
}
/**
* Converts a discovered document subtree into a per-page task input,
* recursively packing the doc's descendants into the `children` field so
* each tree-depth runs as its own task wave.
*
* @param doc The discovered document, including its descendants.
* @param attachmentIdMap External attachment id → new internal id map.
* @returns A self-contained per-page task input.
*/
private toPageInput(
doc: DiscoveredDocument,
attachmentIdMap: Record<string, string>
): JSONPageImportTaskInputItem {
const exported = doc.export;
return {
externalId: doc.externalId,
parentExternalId: doc.parentExternalId,
collectionExternalId: doc.collectionExternalId,
title: exported.title,
urlId: exported.urlId,
icon: exported.icon ?? exported.emoji,
color: exported.color,
data: exported.data,
createdById: exported.createdById,
createdByName: exported.createdByName,
createdByEmail: exported.createdByEmail,
createdAt: exported.createdAt,
updatedAt: exported.updatedAt,
publishedAt: exported.publishedAt,
attachmentIdMap,
children: doc.children.length
? doc.children.map((c) => this.toPageInput(c, attachmentIdMap))
: undefined,
};
}
}
+27 -13
View File
@@ -1,13 +1,17 @@
import Router from "koa-router"; import Router from "koa-router";
import { randomUUID } from "node:crypto";
import { truncate } from "es-toolkit/compat";
import type { WhereOptions } from "sequelize"; import type { WhereOptions } from "sequelize";
import { Sequelize, Op } from "sequelize"; import { Sequelize, Op } from "sequelize";
import { import {
CollectionPermission, CollectionPermission,
CollectionStatusFilter, CollectionStatusFilter,
FileOperationState, FileOperationFormat,
FileOperationType, ImportState,
IntegrationService,
UserRole, UserRole,
} from "@shared/types"; } from "@shared/types";
import { ImportValidation } from "@shared/validations";
import collectionExporter from "@server/commands/collectionExporter"; import collectionExporter from "@server/commands/collectionExporter";
import teamUpdater from "@server/commands/teamUpdater"; import teamUpdater from "@server/commands/teamUpdater";
import auth from "@server/middlewares/authentication"; import auth from "@server/middlewares/authentication";
@@ -22,8 +26,8 @@ import {
User, User,
Group, Group,
Attachment, Attachment,
FileOperation,
Document, Document,
Import,
} from "@server/models"; } from "@server/models";
import { authorize } from "@server/policies"; import { authorize } from "@server/policies";
import { import {
@@ -161,17 +165,27 @@ router.post(
}); });
authorize(user, "read", attachment); authorize(user, "read", attachment);
await FileOperation.createWithCtx(ctx, { const service =
type: FileOperationType.Import, format === FileOperationFormat.MarkdownZip
state: FileOperationState.Creating, ? IntegrationService.Markdown
format, : IntegrationService.JSON;
size: attachment.size,
key: attachment.key, await Import.createWithCtx(ctx, {
userId: user.id, name: truncate(attachment.name, {
length: ImportValidation.maxNameLength,
}),
service,
state: ImportState.Created,
input: [
{
externalId: randomUUID(),
permission: permission ?? undefined,
},
],
scratch: { storageKey: attachment.key },
integrationId: null,
createdById: user.id,
teamId: user.teamId, teamId: user.teamId,
options: {
permission,
},
}); });
ctx.body = { ctx.body = {
+7 -3
View File
@@ -76,10 +76,14 @@ export const CollectionsImportSchema = BaseSchema.extend({
.nullish() .nullish()
.transform((val) => (isUndefined(val) ? null : val)), .transform((val) => (isUndefined(val) ? null : val)),
attachmentId: z.uuid(), attachmentId: z.uuid(),
// Markdown zip imports now run through `imports.create` → /**
// MarkdownAPIImportTask, so only JSON is accepted here. * The format of the upload. Both `json` and `outline-markdown` are
* routed through the API-import pipeline (see `imports.create`); the
* `format` field is retained for backwards compatibility with API
* clients calling this endpoint directly.
*/
format: z format: z
.literal(FileOperationFormat.JSON) .enum([FileOperationFormat.JSON, FileOperationFormat.MarkdownZip])
.prefault(FileOperationFormat.JSON), .prefault(FileOperationFormat.JSON),
}), }),
}); });
@@ -59,6 +59,12 @@ describe("#imports.create", () => {
const integration = await buildIntegration({ const integration = await buildIntegration({
userId: admin.id, userId: admin.id,
teamId: admin.teamId, teamId: admin.teamId,
service: IntegrationService.Notion,
type: IntegrationType.Import,
settings: {
externalWorkspace: { id: "ws-1", name: "Test Workspace" },
// oxlint-disable-next-line @typescript-eslint/no-explicit-any
} as any,
}); });
const input: NotionImportInput = [ const input: NotionImportInput = [
{ permission: CollectionPermission.Read }, { permission: CollectionPermission.Read },
+5 -18
View File
@@ -5,7 +5,6 @@ import type { WhereOptions } from "sequelize";
import type { IntegrationType } from "@shared/types"; import type { IntegrationType } from "@shared/types";
import { ImportState, IntegrationService, UserRole } from "@shared/types"; import { ImportState, IntegrationService, UserRole } from "@shared/types";
import { ImportValidation } from "@shared/validations"; import { ImportValidation } from "@shared/validations";
import { UnprocessableEntityError } from "@server/errors";
import auth from "@server/middlewares/authentication"; import auth from "@server/middlewares/authentication";
import { rateLimiter } from "@server/middlewares/rateLimiter"; import { rateLimiter } from "@server/middlewares/rateLimiter";
import { transaction } from "@server/middlewares/transaction"; import { transaction } from "@server/middlewares/transaction";
@@ -33,22 +32,10 @@ router.post(
authorize(user, "createImport", user.team); authorize(user, "createImport", user.team);
const importInProgress = await Import.count({ if (
where: { body.service === IntegrationService.Markdown ||
state: [ body.service === IntegrationService.JSON
ImportState.Created, ) {
ImportState.InProgress,
ImportState.Processed,
],
teamId: user.teamId,
},
});
if (importInProgress) {
throw UnprocessableEntityError("An import is already in progress");
}
if (body.service === IntegrationService.Markdown) {
const attachment = await Attachment.findByPk(body.attachmentId, { const attachment = await Attachment.findByPk(body.attachmentId, {
rejectOnEmpty: true, rejectOnEmpty: true,
}); });
@@ -58,7 +45,7 @@ router.post(
name: truncate(attachment.name, { name: truncate(attachment.name, {
length: ImportValidation.maxNameLength, length: ImportValidation.maxNameLength,
}), }),
service: IntegrationService.Markdown, service: body.service,
state: ImportState.Created, state: ImportState.Created,
input: [ input: [
{ {
+5
View File
@@ -43,6 +43,11 @@ export const ImportsCreateSchema = BaseSchema.extend({
attachmentId: z.uuid(), attachmentId: z.uuid(),
permission: z.enum(CollectionPermission).optional(), permission: z.enum(CollectionPermission).optional(),
}), }),
z.object({
service: z.literal(IntegrationService.JSON),
attachmentId: z.uuid(),
permission: z.enum(CollectionPermission).optional(),
}),
]), ]),
}); });
+18 -11
View File
@@ -576,18 +576,25 @@ export async function buildImport(overrides: Partial<Import<any>> = {}) {
overrides.integrationId = integration.id; overrides.integrationId = integration.id;
} }
// Skip BeforeCreate hooks so tests can seed multiple imports per team. The
// production "one in-progress import per team" rule is enforced by the
// Import.checkInProgress hook; tests don't need to abide by it.
// oxlint-disable-next-line @typescript-eslint/no-explicit-any // oxlint-disable-next-line @typescript-eslint/no-explicit-any
return Import.create<Import<any>>({ return Import.create<Import<any>>(
name: "testImport", {
service: IntegrationService.Notion, name: "testImport",
state: ImportState.Created, service: IntegrationService.Notion,
input: [ state: ImportState.Created,
{ input: [
permission: CollectionPermission.Read, {
}, permission: CollectionPermission.Read,
], },
...overrides, ],
}); ...overrides,
// oxlint-disable-next-line @typescript-eslint/no-explicit-any
} as any,
{ hooks: false }
);
} }
export async function buildAttachment( export async function buildAttachment(
Binary file not shown.
+100 -5
View File
@@ -1,5 +1,9 @@
import { z } from "zod"; import { z } from "zod";
import type { IntegrationService, ProsemirrorDoc } from "./types"; import type {
IntegrationService,
ProsemirrorData,
ProsemirrorDoc,
} from "./types";
import { import {
CollectionPermission, CollectionPermission,
type ImportableIntegrationService, type ImportableIntegrationService,
@@ -28,12 +32,20 @@ export type MarkdownImportInput = z.infer<
typeof MarkdownImportInputItemSchema typeof MarkdownImportInputItemSchema
>[]; >[];
export const JSONImportInputItemSchema = BaseImportInputItemSchema.extend({
externalId: z.string(),
});
export type JSONImportInput = z.infer<typeof JSONImportInputItemSchema>[];
export type ImportInput<T extends ImportableIntegrationService> = export type ImportInput<T extends ImportableIntegrationService> =
T extends IntegrationService.Notion T extends IntegrationService.Notion
? NotionImportInput ? NotionImportInput
: T extends IntegrationService.Markdown : T extends IntegrationService.Markdown
? MarkdownImportInput ? MarkdownImportInput
: BaseImportInput; : T extends IntegrationService.JSON
? JSONImportInput
: BaseImportInput;
export const BaseImportTaskInputItemSchema = z.object({ export const BaseImportTaskInputItemSchema = z.object({
externalId: z.string(), externalId: z.string(),
@@ -82,6 +94,36 @@ export interface MarkdownImportScratch {
manifest?: MarkdownAttachmentManifestItem[]; manifest?: MarkdownAttachmentManifestItem[];
} }
/**
* Manifest entry describing a single attachment discovered during the JSON
* zip bootstrap phase. `externalId` is the attachment's original id from the
* export — used to rewrite `/api/attachments.redirect?id=<externalId>`
* references in document/collection content into new redirect URLs that point
* at the freshly created Attachment row (`id`).
*/
export const JSONAttachmentManifestItemSchema = z.object({
id: z.uuid(),
externalId: z.string(),
name: z.string(),
mimeType: z.string(),
pathInZip: z.string(),
});
export type JSONAttachmentManifestItem = z.infer<
typeof JSONAttachmentManifestItemSchema
>;
/**
* JSON importer scratch state. `storageKey` is set at import creation (it's
* the only durable handle on the uploaded zip). `manifest` is added by the
* bootstrap phase so the completion phase can re-download the zip and create
* Attachment rows without re-parsing the JSON files.
*/
export interface JSONImportScratch {
storageKey: string;
manifest?: JSONAttachmentManifestItem[];
}
/** /**
* Per-importer scratch shape stored on `Import.scratch`. Holds cross-phase * Per-importer scratch shape stored on `Import.scratch`. Holds cross-phase
* state that the importer needs between bootstrap and completion but that * state that the importer needs between bootstrap and completion but that
@@ -89,7 +131,11 @@ export interface MarkdownImportScratch {
* `Processed`. * `Processed`.
*/ */
export type ImportScratch<T extends ImportableIntegrationService> = export type ImportScratch<T extends ImportableIntegrationService> =
T extends IntegrationService.Markdown ? MarkdownImportScratch : never; T extends IntegrationService.Markdown
? MarkdownImportScratch
: T extends IntegrationService.JSON
? JSONImportScratch
: never;
/** /**
* Per-page task input. Generated by the bootstrap task and consumed by * Per-page task input. Generated by the bootstrap task and consumed by
@@ -124,22 +170,71 @@ export type MarkdownImportTaskInput = (
| MarkdownPageImportTaskInputItem | MarkdownPageImportTaskInputItem
)[]; )[];
/**
* Per-page task input for the JSON importer. Generated by the bootstrap task
* once the zip has been parsed; consumed by subsequent JSONAPIImportTask runs.
* `children` carries this document's direct descendants so each tree-depth
* runs as its own task wave, preserving parent-before-child ordering during
* persistence (createdAt of child tasks is strictly later than parents'). The
* type is defined as a TypeScript interface rather than via z.infer because
* it is only consumed internally — never validated at an API boundary — and
* zod's recursive-schema ergonomics aren't worth the cost here.
*/
export interface JSONPageImportTaskInputItem {
externalId: string;
parentExternalId?: string;
collectionExternalId?: string;
title: string;
urlId?: string;
icon?: string | null;
color?: string | null;
data: ProsemirrorData;
createdById?: string;
createdByName?: string;
createdByEmail?: string | null;
createdAt?: string;
updatedAt?: string;
publishedAt?: string | null;
/** Map of external attachment id → manifest entry id, scoped to this doc. */
attachmentIdMap: Record<string, string>;
children?: JSONPageImportTaskInputItem[];
}
/**
* JSON import task input — a bootstrap row carrying only the base placeholder
* item (the zip's `storageKey` lives on `Import.scratch`), or a page row
* carrying per-document content.
*/
export type JSONImportTaskInput = (
| BaseImportTaskInput[number]
| JSONPageImportTaskInputItem
)[];
export type ImportTaskInput<T extends ImportableIntegrationService> = export type ImportTaskInput<T extends ImportableIntegrationService> =
T extends IntegrationService.Notion T extends IntegrationService.Notion
? NotionImportTaskInput ? NotionImportTaskInput
: T extends IntegrationService.Markdown : T extends IntegrationService.Markdown
? MarkdownImportTaskInput ? MarkdownImportTaskInput
: BaseImportTaskInput; : T extends IntegrationService.JSON
? JSONImportTaskInput
: BaseImportTaskInput;
// No reason to be here except for co-location with import task input. // No reason to be here except for co-location with import task input.
export type ImportTaskOutput = { export type ImportTaskOutput = {
externalId: string; externalId: string;
title: string; title: string;
icon?: string; icon?: string | null;
color?: string | null;
urlId?: string;
author?: string; author?: string;
/** Original author's id in the source system, used for user remapping. */
createdById?: string;
/** Original author's email in the source system, used for user remapping. */
createdByEmail?: string | null;
content: ProsemirrorDoc; content: ProsemirrorDoc;
createdAt?: Date; createdAt?: Date;
updatedAt?: Date; updatedAt?: Date;
publishedAt?: Date | null;
}[]; }[];
export const IssueSource = z.object({ export const IssueSource = z.object({
+5 -1
View File
@@ -167,16 +167,20 @@ export enum IntegrationService {
Figma = "figma", Figma = "figma",
Notion = "notion", Notion = "notion",
Markdown = "markdown", Markdown = "markdown",
JSON = "json",
} }
export type ImportableIntegrationService = Extract< export type ImportableIntegrationService = Extract<
IntegrationService, IntegrationService,
IntegrationService.Notion | IntegrationService.Markdown | IntegrationService.Notion
| IntegrationService.Markdown
| IntegrationService.JSON
>; >;
export const ImportableIntegrationService = { export const ImportableIntegrationService = {
Notion: IntegrationService.Notion, Notion: IntegrationService.Notion,
Markdown: IntegrationService.Markdown, Markdown: IntegrationService.Markdown,
JSON: IntegrationService.JSON,
} as const; } as const;
export type IssueTrackerIntegrationService = Extract< export type IssueTrackerIntegrationService = Extract<