fix: Remapping internal links on import (#12461)

* fix: No remapping of internal links

closes #9584

* PR feedback, testing

* tsc
This commit is contained in:
Tom Moor
2026-05-26 07:06:56 -04:00
committed by GitHub
parent 8c1be70198
commit f3da1bc79e
4 changed files with 250 additions and 31 deletions
+16
View File
@@ -23,6 +23,22 @@ describe("#url", () => {
});
expect(collection.path).toBe(`/collection/untitled-${collection.urlId}`);
});
it("should return correct url with slugified collection name", () => {
const path = Collection.getPath({
name: "Test Collection",
urlId: "abcdefghij",
});
expect(path).toBe("/collection/test-collection-abcdefghij");
});
it("should return untitled when collection name is empty", () => {
const path = Collection.getPath({
name: "",
urlId: "abcdefghij",
});
expect(path).toBe("/collection/untitled-abcdefghij");
});
});
describe("getDocumentParents", () => {
+21 -3
View File
@@ -322,10 +322,28 @@ class Collection extends ParanoidModel<
/** The frontend path to this collection. */
get path(): string {
if (!this.name) {
return `/collection/untitled-${this.urlId}`;
return Collection.getPath({
name: this.name,
urlId: this.urlId,
});
}
/**
* Returns the frontend path for a collection.
*
* @param name The collection name.
* @param urlId The collection URL ID.
* @returns the frontend path for the collection.
*/
static getPath({ name, urlId }: { name: string; urlId: string }): string {
if (!name) {
return `/collection/untitled-${urlId}`;
}
return `/collection/${slugify(this.name)}-${this.urlId}`;
const slugifiedName = slugify(name);
if (!slugifiedName) {
return `/collection/untitled-${urlId}`;
}
return `/collection/${slugifiedName}-${urlId}`;
}
/**
+113 -28
View File
@@ -24,6 +24,7 @@ import {
MentionType,
} from "@shared/types";
import { colorPalette } from "@shared/utils/collections";
import { UrlHelper } from "@shared/utils/UrlHelper";
import { CollectionValidation } from "@shared/validations";
import { createContext } from "@server/context";
import { schema } from "@server/editor";
@@ -307,6 +308,42 @@ export default abstract class ImportsProcessor<
let collectionIdx = firstCollection?.index ?? null;
// Pre-pass: allocate new urlIds for every collection and document in this
// import so internal link hrefs in document content can be rewritten to
// point at the new paths during persistence. The map is keyed by the old
// urlId from the export, which is what appears in `/doc/<slug>-<urlId>`
// and `/collection/<slug>-<urlId>` link hrefs.
const urlIdMap: Record<string, { urlId: string; title: string }> = {};
await ImportTask.findAllInBatches<ImportTask<T>>(
{
where: { importId: importModel.id },
order: [
["createdAt", "ASC"],
["id", "ASC"],
],
batchLimit: 5,
transaction,
},
async (importTasks) => {
for (const importTask of importTasks) {
for (const output of importTask.output ?? []) {
if (!output.urlId || urlIdMap[output.urlId]) {
continue;
}
const isCollection = !!importInput[output.externalId];
const allocated = isCollection
? await this.preserveCollectionUrlId(output.urlId, transaction)
: await this.preserveDocumentUrlId(output.urlId, transaction);
urlIdMap[output.urlId] = {
urlId: allocated,
title: output.title,
};
}
}
}
);
await ImportTask.findAllInBatches<ImportTask<T>>(
{
where: { importId: importModel.id },
@@ -353,11 +390,12 @@ export default abstract class ImportsProcessor<
transaction,
});
const transformedContent = await this.updateMentionsAndAttachments({
const transformedContent = await this.rewriteReferences({
content: output.content,
attachments,
importInput,
idMap,
urlIdMap,
actorId: importModel.createdById,
teamId: importModel.teamId,
});
@@ -388,10 +426,9 @@ export default abstract class ImportsProcessor<
createdByName: output.author,
};
const urlId = await this.preserveCollectionUrlId(
output.urlId,
transaction
);
const urlId = output.urlId
? urlIdMap[output.urlId]?.urlId
: undefined;
const collection = Collection.build({
id: internalId,
@@ -439,10 +476,9 @@ export default abstract class ImportsProcessor<
const isRootDocument =
!parentExternalId || !!importInput[parentExternalId];
const urlId = await this.preserveDocumentUrlId(
output.urlId,
transaction
);
const urlId = output.urlId
? urlIdMap[output.urlId]?.urlId
: undefined;
const defaults = {
title: output.title,
@@ -519,19 +555,25 @@ export default abstract class ImportsProcessor<
}
/**
* Transform the mentions and attachments in ProseMirrorDoc to their internal references.
* Rewrite the mentions, attachments, and internal document/collection link
* marks in a ProseMirrorDoc so they resolve against the imported models
* rather than the source export.
*
* @param content ProseMirrorDoc that represents collection (or) document content.
* @param attachments Array of attachment models created for the import.
* @param idMap Map of internalId to externalId.
* @param urlIdMap Map of old urlId to the newly allocated urlId and title,
* used to rewrite `/doc/<slug>-<urlId>` and `/collection/<slug>-<urlId>`
* link hrefs.
* @param importInput Contains the root externalId and associated info which were used to create the import.
* @param actorId ID of the user who created the import.
* @returns Updated ProseMirrorDoc.
*/
private async updateMentionsAndAttachments({
private async rewriteReferences({
content,
attachments,
idMap,
urlIdMap,
importInput,
actorId,
teamId,
@@ -539,6 +581,7 @@ export default abstract class ImportsProcessor<
content: ProsemirrorDoc;
attachments: Attachment[];
idMap: Record<string, string>;
urlIdMap: Record<string, { urlId: string; title: string }>;
// oxlint-disable-next-line @typescript-eslint/no-explicit-any
importInput: Record<string, ImportInput<any>[number]>;
actorId: string;
@@ -551,6 +594,7 @@ export default abstract class ImportsProcessor<
const attachmentsMap = keyBy(attachments, "id");
const doc = ProsemirrorHelper.toProsemirror(content);
const linkMarkType = schema.marks.link;
const transformMentionNode = async (node: Node): Promise<Node> => {
const json = node.toJSON() as ProsemirrorData;
@@ -581,6 +625,55 @@ export default abstract class ImportsProcessor<
return Node.fromJSON(schema, json);
};
const rewriteInternalLinkHref = (href: string): string => {
const docMatch = /^\/doc\/([^/?#]+)(.*)$/.exec(href);
if (docMatch) {
const slugMatch = UrlHelper.SLUG_URL_REGEX.exec(docMatch[1]);
const mapped = slugMatch ? urlIdMap[slugMatch[1]] : undefined;
if (mapped) {
return (
Document.getPath({ title: mapped.title, urlId: mapped.urlId }) +
docMatch[2]
);
}
}
const collectionMatch = /^\/collection\/([^/?#]+)(.*)$/.exec(href);
if (collectionMatch) {
const slugMatch = UrlHelper.SLUG_URL_REGEX.exec(collectionMatch[1]);
const mapped = slugMatch ? urlIdMap[slugMatch[1]] : undefined;
if (mapped) {
return (
Collection.getPath({ name: mapped.title, urlId: mapped.urlId }) +
collectionMatch[2]
);
}
}
return href;
};
const transformLinkMarks = (node: Node): Node => {
if (!node.marks.length) {
return node;
}
let changed = false;
const newMarks = node.marks.map((mark) => {
if (mark.type !== linkMarkType) {
return mark;
}
const href = mark.attrs.href as string | undefined;
if (!href) {
return mark;
}
const newHref = rewriteInternalLinkHref(href);
if (newHref === href) {
return mark;
}
changed = true;
return linkMarkType.create({ ...mark.attrs, href: newHref });
});
return changed ? node.mark(newMarks) : node;
};
const transformFragment = async (fragment: Fragment): Promise<Fragment> => {
const nodePromises: Promise<Node>[] = [];
@@ -592,7 +685,7 @@ export default abstract class ImportsProcessor<
} else {
nodePromises.push(
transformFragment(node.content).then((transformedContent) =>
node.copy(transformedContent)
transformLinkMarks(node.copy(transformedContent))
)
);
}
@@ -703,20 +796,16 @@ export default abstract class ImportsProcessor<
/**
* Honors a urlId from a document export if it does not collide with an
* existing Document, otherwise generates a fresh one. Returns `undefined`
* when no urlId is supplied (so the model's default applies).
* existing Document, otherwise generates a fresh one.
*
* @param sourceUrlId The urlId requested by the importer.
* @param transaction Active sequelize transaction.
* @returns A urlId to use, or undefined to fall through to the default.
* @returns A urlId to use.
*/
private async preserveDocumentUrlId(
sourceUrlId: string | undefined,
sourceUrlId: string,
transaction: Transaction
): Promise<string | undefined> {
if (!sourceUrlId) {
return undefined;
}
): Promise<string> {
const existing = await Document.unscoped().findOne({
attributes: ["id"],
paranoid: false,
@@ -728,20 +817,16 @@ export default abstract class ImportsProcessor<
/**
* Honors a urlId from a collection export if it does not collide with an
* existing Collection, otherwise generates a fresh one. Returns `undefined`
* when no urlId is supplied (so the model's default applies).
* existing Collection, otherwise generates a fresh one.
*
* @param sourceUrlId The urlId requested by the importer.
* @param transaction Active sequelize transaction.
* @returns A urlId to use, or undefined to fall through to the default.
* @returns A urlId to use.
*/
private async preserveCollectionUrlId(
sourceUrlId: string | undefined,
sourceUrlId: string,
transaction: Transaction
): Promise<string | undefined> {
if (!sourceUrlId) {
return undefined;
}
): Promise<string> {
const existing = await Collection.unscoped().findOne({
attributes: ["id"],
paranoid: false,
@@ -19,6 +19,7 @@ import {
} from "@shared/types";
import {
buildAdmin,
buildDocument,
buildImport,
buildTeam,
buildUser,
@@ -35,6 +36,8 @@ const FIXTURE_USER_EMAIL = "hmac.devo@gmail.com";
interface BuiltZip {
filePath: string;
documentOneUrlId: string;
documentTwoUrlId: string;
cleanup: () => Promise<void>;
}
@@ -85,6 +88,42 @@ async function buildJSONExportZip(): Promise<BuiltZip> {
type: "paragraph",
content: [{ type: "text", text: "Some random text" }],
},
{
type: "paragraph",
content: [
{
type: "text",
text: "see doc two",
marks: [
{
type: "link",
attrs: {
href: `/doc/document-2-${documentTwoUrlId}`,
title: null,
},
},
],
},
],
},
{
type: "paragraph",
content: [
{
type: "text",
text: "see collection",
marks: [
{
type: "link",
attrs: {
href: `/collection/test-json-${collectionUrlId}`,
title: null,
},
},
],
},
],
},
{
type: "paragraph",
content: [
@@ -168,6 +207,8 @@ async function buildJSONExportZip(): Promise<BuiltZip> {
return {
filePath,
documentOneUrlId,
documentTwoUrlId,
cleanup: async () => {
await fs.rm(filePath, { force: true }).catch(() => {});
},
@@ -290,6 +331,65 @@ describe("JSONAPIImportTask", () => {
expect(attachments.length).toBeGreaterThanOrEqual(1);
});
it("rewrites internal document links to the new urlIds", async () => {
const admin = await buildAdmin();
// Pre-create a document with the exported urlId so the importer is
// forced to allocate a fresh urlId for Document 2. Without a collision
// the original urlId would be preserved and link rewriting wouldn't be
// exercised end-to-end.
await buildDocument({
teamId: admin.teamId,
userId: admin.id,
urlId: zip.documentTwoUrlId,
});
const { importId } = await runImport({
teamId: admin.teamId,
createdById: admin.id,
zipPath: zip.filePath,
});
const documents = await Document.findAll({
where: { apiImportId: importId },
order: [["title", "ASC"]],
});
expect(documents.length).toBe(2);
const docOne = documents.find((d) => d.title === "Document 1");
const docTwo = documents.find((d) => d.title === "Document 2");
expect(docOne).toBeDefined();
expect(docTwo).toBeDefined();
expect(docTwo!.urlId).not.toBe(zip.documentTwoUrlId);
const linkParagraph = docOne!.content?.content?.[1];
const linkText = linkParagraph?.content?.[0];
const linkMark = linkText?.marks?.find((m) => m.type === "link");
expect(linkMark?.attrs?.href).toBe(`/doc/document-2-${docTwo!.urlId}`);
expect(linkMark?.attrs?.href).not.toContain(zip.documentTwoUrlId);
});
it("rewrites internal collection links to slugged collection paths", async () => {
const admin = await buildAdmin();
const { importId } = await runImport({
teamId: admin.teamId,
createdById: admin.id,
zipPath: zip.filePath,
});
const collection = await Collection.findOne({
where: { apiImportId: importId },
rejectOnEmpty: true,
});
const docOne = await Document.findOne({
where: { apiImportId: importId, title: "Document 1" },
rejectOnEmpty: true,
});
const linkParagraph = docOne.content?.content?.[2];
const linkText = linkParagraph?.content?.[0];
const linkMark = linkText?.marks?.find((m) => m.type === "link");
expect(linkMark?.attrs?.href).toBe(collection.path);
});
describe("user mapping", () => {
it("maps createdById to an existing user by ID", async () => {
let originalAuthor = await User.findByPk(FIXTURE_USER_ID);