From f3da1bc79e193f65dce4718ec52811041197b623 Mon Sep 17 00:00:00 2001 From: Tom Moor Date: Tue, 26 May 2026 07:06:56 -0400 Subject: [PATCH] fix: Remapping internal links on import (#12461) * fix: No remapping of internal links closes #9584 * PR feedback, testing * tsc --- server/models/Collection.test.ts | 16 ++ server/models/Collection.ts | 24 ++- server/queues/processors/ImportsProcessor.ts | 141 ++++++++++++++---- server/queues/tasks/JSONAPIImportTask.test.ts | 100 +++++++++++++ 4 files changed, 250 insertions(+), 31 deletions(-) diff --git a/server/models/Collection.test.ts b/server/models/Collection.test.ts index 50c971901c..12f192c42e 100644 --- a/server/models/Collection.test.ts +++ b/server/models/Collection.test.ts @@ -23,6 +23,22 @@ describe("#url", () => { }); expect(collection.path).toBe(`/collection/untitled-${collection.urlId}`); }); + + it("should return correct url with slugified collection name", () => { + const path = Collection.getPath({ + name: "Test Collection", + urlId: "abcdefghij", + }); + expect(path).toBe("/collection/test-collection-abcdefghij"); + }); + + it("should return untitled when collection name is empty", () => { + const path = Collection.getPath({ + name: "", + urlId: "abcdefghij", + }); + expect(path).toBe("/collection/untitled-abcdefghij"); + }); }); describe("getDocumentParents", () => { diff --git a/server/models/Collection.ts b/server/models/Collection.ts index b06715929b..64d121a081 100644 --- a/server/models/Collection.ts +++ b/server/models/Collection.ts @@ -322,10 +322,28 @@ class Collection extends ParanoidModel< /** The frontend path to this collection. */ get path(): string { - if (!this.name) { - return `/collection/untitled-${this.urlId}`; + return Collection.getPath({ + name: this.name, + urlId: this.urlId, + }); + } + + /** + * Returns the frontend path for a collection. + * + * @param name The collection name. + * @param urlId The collection URL ID. + * @returns the frontend path for the collection. + */ + static getPath({ name, urlId }: { name: string; urlId: string }): string { + if (!name) { + return `/collection/untitled-${urlId}`; } - return `/collection/${slugify(this.name)}-${this.urlId}`; + const slugifiedName = slugify(name); + if (!slugifiedName) { + return `/collection/untitled-${urlId}`; + } + return `/collection/${slugifiedName}-${urlId}`; } /** diff --git a/server/queues/processors/ImportsProcessor.ts b/server/queues/processors/ImportsProcessor.ts index d5eec938f1..4d2c255299 100644 --- a/server/queues/processors/ImportsProcessor.ts +++ b/server/queues/processors/ImportsProcessor.ts @@ -24,6 +24,7 @@ import { MentionType, } from "@shared/types"; import { colorPalette } from "@shared/utils/collections"; +import { UrlHelper } from "@shared/utils/UrlHelper"; import { CollectionValidation } from "@shared/validations"; import { createContext } from "@server/context"; import { schema } from "@server/editor"; @@ -307,6 +308,42 @@ export default abstract class ImportsProcessor< let collectionIdx = firstCollection?.index ?? null; + // Pre-pass: allocate new urlIds for every collection and document in this + // import so internal link hrefs in document content can be rewritten to + // point at the new paths during persistence. The map is keyed by the old + // urlId from the export, which is what appears in `/doc/-` + // and `/collection/-` link hrefs. + const urlIdMap: Record = {}; + + await ImportTask.findAllInBatches>( + { + where: { importId: importModel.id }, + order: [ + ["createdAt", "ASC"], + ["id", "ASC"], + ], + batchLimit: 5, + transaction, + }, + async (importTasks) => { + for (const importTask of importTasks) { + for (const output of importTask.output ?? []) { + if (!output.urlId || urlIdMap[output.urlId]) { + continue; + } + const isCollection = !!importInput[output.externalId]; + const allocated = isCollection + ? await this.preserveCollectionUrlId(output.urlId, transaction) + : await this.preserveDocumentUrlId(output.urlId, transaction); + urlIdMap[output.urlId] = { + urlId: allocated, + title: output.title, + }; + } + } + } + ); + await ImportTask.findAllInBatches>( { where: { importId: importModel.id }, @@ -353,11 +390,12 @@ export default abstract class ImportsProcessor< transaction, }); - const transformedContent = await this.updateMentionsAndAttachments({ + const transformedContent = await this.rewriteReferences({ content: output.content, attachments, importInput, idMap, + urlIdMap, actorId: importModel.createdById, teamId: importModel.teamId, }); @@ -388,10 +426,9 @@ export default abstract class ImportsProcessor< createdByName: output.author, }; - const urlId = await this.preserveCollectionUrlId( - output.urlId, - transaction - ); + const urlId = output.urlId + ? urlIdMap[output.urlId]?.urlId + : undefined; const collection = Collection.build({ id: internalId, @@ -439,10 +476,9 @@ export default abstract class ImportsProcessor< const isRootDocument = !parentExternalId || !!importInput[parentExternalId]; - const urlId = await this.preserveDocumentUrlId( - output.urlId, - transaction - ); + const urlId = output.urlId + ? urlIdMap[output.urlId]?.urlId + : undefined; const defaults = { title: output.title, @@ -519,19 +555,25 @@ export default abstract class ImportsProcessor< } /** - * Transform the mentions and attachments in ProseMirrorDoc to their internal references. + * Rewrite the mentions, attachments, and internal document/collection link + * marks in a ProseMirrorDoc so they resolve against the imported models + * rather than the source export. * * @param content ProseMirrorDoc that represents collection (or) document content. * @param attachments Array of attachment models created for the import. * @param idMap Map of internalId to externalId. + * @param urlIdMap Map of old urlId to the newly allocated urlId and title, + * used to rewrite `/doc/-` and `/collection/-` + * link hrefs. * @param importInput Contains the root externalId and associated info which were used to create the import. * @param actorId ID of the user who created the import. * @returns Updated ProseMirrorDoc. */ - private async updateMentionsAndAttachments({ + private async rewriteReferences({ content, attachments, idMap, + urlIdMap, importInput, actorId, teamId, @@ -539,6 +581,7 @@ export default abstract class ImportsProcessor< content: ProsemirrorDoc; attachments: Attachment[]; idMap: Record; + urlIdMap: Record; // oxlint-disable-next-line @typescript-eslint/no-explicit-any importInput: Record[number]>; actorId: string; @@ -551,6 +594,7 @@ export default abstract class ImportsProcessor< const attachmentsMap = keyBy(attachments, "id"); const doc = ProsemirrorHelper.toProsemirror(content); + const linkMarkType = schema.marks.link; const transformMentionNode = async (node: Node): Promise => { const json = node.toJSON() as ProsemirrorData; @@ -581,6 +625,55 @@ export default abstract class ImportsProcessor< return Node.fromJSON(schema, json); }; + const rewriteInternalLinkHref = (href: string): string => { + const docMatch = /^\/doc\/([^/?#]+)(.*)$/.exec(href); + if (docMatch) { + const slugMatch = UrlHelper.SLUG_URL_REGEX.exec(docMatch[1]); + const mapped = slugMatch ? urlIdMap[slugMatch[1]] : undefined; + if (mapped) { + return ( + Document.getPath({ title: mapped.title, urlId: mapped.urlId }) + + docMatch[2] + ); + } + } + const collectionMatch = /^\/collection\/([^/?#]+)(.*)$/.exec(href); + if (collectionMatch) { + const slugMatch = UrlHelper.SLUG_URL_REGEX.exec(collectionMatch[1]); + const mapped = slugMatch ? urlIdMap[slugMatch[1]] : undefined; + if (mapped) { + return ( + Collection.getPath({ name: mapped.title, urlId: mapped.urlId }) + + collectionMatch[2] + ); + } + } + return href; + }; + + const transformLinkMarks = (node: Node): Node => { + if (!node.marks.length) { + return node; + } + let changed = false; + const newMarks = node.marks.map((mark) => { + if (mark.type !== linkMarkType) { + return mark; + } + const href = mark.attrs.href as string | undefined; + if (!href) { + return mark; + } + const newHref = rewriteInternalLinkHref(href); + if (newHref === href) { + return mark; + } + changed = true; + return linkMarkType.create({ ...mark.attrs, href: newHref }); + }); + return changed ? node.mark(newMarks) : node; + }; + const transformFragment = async (fragment: Fragment): Promise => { const nodePromises: Promise[] = []; @@ -592,7 +685,7 @@ export default abstract class ImportsProcessor< } else { nodePromises.push( transformFragment(node.content).then((transformedContent) => - node.copy(transformedContent) + transformLinkMarks(node.copy(transformedContent)) ) ); } @@ -703,20 +796,16 @@ export default abstract class ImportsProcessor< /** * Honors a urlId from a document export if it does not collide with an - * existing Document, otherwise generates a fresh one. Returns `undefined` - * when no urlId is supplied (so the model's default applies). + * existing Document, otherwise generates a fresh one. * * @param sourceUrlId The urlId requested by the importer. * @param transaction Active sequelize transaction. - * @returns A urlId to use, or undefined to fall through to the default. + * @returns A urlId to use. */ private async preserveDocumentUrlId( - sourceUrlId: string | undefined, + sourceUrlId: string, transaction: Transaction - ): Promise { - if (!sourceUrlId) { - return undefined; - } + ): Promise { const existing = await Document.unscoped().findOne({ attributes: ["id"], paranoid: false, @@ -728,20 +817,16 @@ export default abstract class ImportsProcessor< /** * Honors a urlId from a collection export if it does not collide with an - * existing Collection, otherwise generates a fresh one. Returns `undefined` - * when no urlId is supplied (so the model's default applies). + * existing Collection, otherwise generates a fresh one. * * @param sourceUrlId The urlId requested by the importer. * @param transaction Active sequelize transaction. - * @returns A urlId to use, or undefined to fall through to the default. + * @returns A urlId to use. */ private async preserveCollectionUrlId( - sourceUrlId: string | undefined, + sourceUrlId: string, transaction: Transaction - ): Promise { - if (!sourceUrlId) { - return undefined; - } + ): Promise { const existing = await Collection.unscoped().findOne({ attributes: ["id"], paranoid: false, diff --git a/server/queues/tasks/JSONAPIImportTask.test.ts b/server/queues/tasks/JSONAPIImportTask.test.ts index 507d9c62cf..b712f63e11 100644 --- a/server/queues/tasks/JSONAPIImportTask.test.ts +++ b/server/queues/tasks/JSONAPIImportTask.test.ts @@ -19,6 +19,7 @@ import { } from "@shared/types"; import { buildAdmin, + buildDocument, buildImport, buildTeam, buildUser, @@ -35,6 +36,8 @@ const FIXTURE_USER_EMAIL = "hmac.devo@gmail.com"; interface BuiltZip { filePath: string; + documentOneUrlId: string; + documentTwoUrlId: string; cleanup: () => Promise; } @@ -85,6 +88,42 @@ async function buildJSONExportZip(): Promise { type: "paragraph", content: [{ type: "text", text: "Some random text" }], }, + { + type: "paragraph", + content: [ + { + type: "text", + text: "see doc two", + marks: [ + { + type: "link", + attrs: { + href: `/doc/document-2-${documentTwoUrlId}`, + title: null, + }, + }, + ], + }, + ], + }, + { + type: "paragraph", + content: [ + { + type: "text", + text: "see collection", + marks: [ + { + type: "link", + attrs: { + href: `/collection/test-json-${collectionUrlId}`, + title: null, + }, + }, + ], + }, + ], + }, { type: "paragraph", content: [ @@ -168,6 +207,8 @@ async function buildJSONExportZip(): Promise { return { filePath, + documentOneUrlId, + documentTwoUrlId, cleanup: async () => { await fs.rm(filePath, { force: true }).catch(() => {}); }, @@ -290,6 +331,65 @@ describe("JSONAPIImportTask", () => { expect(attachments.length).toBeGreaterThanOrEqual(1); }); + it("rewrites internal document links to the new urlIds", async () => { + const admin = await buildAdmin(); + // Pre-create a document with the exported urlId so the importer is + // forced to allocate a fresh urlId for Document 2. Without a collision + // the original urlId would be preserved and link rewriting wouldn't be + // exercised end-to-end. + await buildDocument({ + teamId: admin.teamId, + userId: admin.id, + urlId: zip.documentTwoUrlId, + }); + + const { importId } = await runImport({ + teamId: admin.teamId, + createdById: admin.id, + zipPath: zip.filePath, + }); + + const documents = await Document.findAll({ + where: { apiImportId: importId }, + order: [["title", "ASC"]], + }); + expect(documents.length).toBe(2); + const docOne = documents.find((d) => d.title === "Document 1"); + const docTwo = documents.find((d) => d.title === "Document 2"); + expect(docOne).toBeDefined(); + expect(docTwo).toBeDefined(); + expect(docTwo!.urlId).not.toBe(zip.documentTwoUrlId); + + const linkParagraph = docOne!.content?.content?.[1]; + const linkText = linkParagraph?.content?.[0]; + const linkMark = linkText?.marks?.find((m) => m.type === "link"); + expect(linkMark?.attrs?.href).toBe(`/doc/document-2-${docTwo!.urlId}`); + expect(linkMark?.attrs?.href).not.toContain(zip.documentTwoUrlId); + }); + + it("rewrites internal collection links to slugged collection paths", async () => { + const admin = await buildAdmin(); + const { importId } = await runImport({ + teamId: admin.teamId, + createdById: admin.id, + zipPath: zip.filePath, + }); + + const collection = await Collection.findOne({ + where: { apiImportId: importId }, + rejectOnEmpty: true, + }); + const docOne = await Document.findOne({ + where: { apiImportId: importId, title: "Document 1" }, + rejectOnEmpty: true, + }); + + const linkParagraph = docOne.content?.content?.[2]; + const linkText = linkParagraph?.content?.[0]; + const linkMark = linkText?.marks?.find((m) => m.type === "link"); + expect(linkMark?.attrs?.href).toBe(collection.path); + }); + describe("user mapping", () => { it("maps createdById to an existing user by ID", async () => { let originalAuthor = await User.findByPk(FIXTURE_USER_ID);