Files
outline/server/queues/tasks/ImportMarkdownZipTask.ts
T
Tom Moor adbffc0734 chore: clear mechanical lint warnings (Phase 1) (#12198)
* chore: clear mechanical lint warnings

Drops 44 oxlint warnings (559 → 515) by fixing easy mechanical rules
across the codebase: no-useless-escape, no-duplicate-type-constituents,
no-redundant-type-constituents, no-unused-expressions,
no-meaningless-void-operator, require-array-sort-compare, await-thenable.

* chore: drop callback parameter from useCallback deps

The `open` argument is a parameter of the callback, not a closed-over
variable, so it doesn't belong in the deps array.

* chore: promote cleared lint rules to errors

Promotes the rules cleared in this PR from warn to error so future
violations fail the lint:

- no-unused-expressions
- typescript/await-thenable
- typescript/no-duplicate-type-constituents
- typescript/no-meaningless-void-operator
- typescript/require-array-sort-compare

Removes the override that suppressed no-useless-escape on source
files (the global rule is already error) and fixes the 21 escape
violations that this exposed in regex character classes and template
literals.

* chore: address PR review feedback

- usePinnedDocuments: simplify UrlId to plain string instead of the
  intersection trick.
- PlantUML embed: move - to end of character class so it's a literal
  hyphen rather than a range operator.
- checkboxes: type token params as Token | undefined to match the
  actual call sites that pass tokens[index - 2] etc.
2026-04-28 20:00:03 -04:00

267 lines
8.7 KiB
TypeScript

import path from "node:path";
import fs from "fs-extra";
import escapeRegExp from "lodash/escapeRegExp";
import mime from "mime-types";
import { randomUUID } from "node:crypto";
import documentImporter from "@server/commands/documentImporter";
import { createContext } from "@server/context";
import Logger from "@server/logging/Logger";
import type { FileOperation } from "@server/models";
import { User } from "@server/models";
import { Buckets } from "@server/models/helpers/AttachmentHelper";
import { sequelize } from "@server/storage/database";
import type { FileTreeNode } from "@server/utils/ImportHelper";
import ImportHelper from "@server/utils/ImportHelper";
import type { StructuredImportData } from "./ImportTask";
import ImportTask from "./ImportTask";
export default class ImportMarkdownZipTask extends ImportTask {
public async parseData(
dirPath: string,
fileOperation: FileOperation
): Promise<StructuredImportData> {
const tree = await ImportHelper.toFileTree(dirPath);
if (!tree) {
throw new Error("Could not find valid content in zip file");
}
return this.parseFileTree(fileOperation, tree.children);
}
/**
* Check if a folder contains only attachment files (no markdown documents).
*
* @param node The file tree node to check
* @returns true if the folder contains only non-markdown files
*/
private isAttachmentFolder(node: FileTreeNode): boolean {
if (node.children.length === 0) {
return false;
}
return node.children.every((child) => {
// If child has children, it's a folder - recurse to check its contents
if (child.children.length > 0) {
return this.isAttachmentFolder(child);
}
// Child has no children - could be a file or empty folder
const ext = path.extname(child.name).toLowerCase();
// If no extension, it's likely an empty folder, not a file.
// Be conservative and don't treat it as an attachment.
if (!ext) {
return false;
}
// It's a file with an extension - check if it's NOT markdown
return ext !== ".md" && ext !== ".markdown";
});
}
/**
* Recursively process all files in a folder as attachments.
*
* @param node The file tree node to process
* @param output The structured import data to add attachments to
*/
private parseAttachmentFolder(
node: FileTreeNode,
output: StructuredImportData
): void {
for (const child of node.children) {
if (child.children.length > 0) {
this.parseAttachmentFolder(child, output);
} else {
const id = randomUUID();
output.attachments.push({
id,
name: child.name,
path: child.path,
mimeType: mime.lookup(child.path) || "application/octet-stream",
buffer: () => fs.readFile(child.path),
});
}
}
}
/**
* Converts the file structure from zipAsFileTree into documents,
* collections, and attachments.
*
* @param fileOperation The file operation
* @param tree An array of FileTreeNode representing root files in the zip
* @returns A StructuredImportData object
*/
private async parseFileTree(
fileOperation: FileOperation,
tree: FileTreeNode[]
): Promise<StructuredImportData> {
const user = await User.findByPk(fileOperation.userId, {
rejectOnEmpty: true,
});
const output: StructuredImportData = {
collections: [],
documents: [],
attachments: [],
};
const docPathToIdMap = new Map<string, string>();
const parseNodeChildren = async (
children: FileTreeNode[],
collectionId: string,
parentDocumentId?: string
): Promise<void> => {
await Promise.all(
children.map(async (child) => {
// special case for folders of attachments - detect by content
if (child.children.length > 0 && this.isAttachmentFolder(child)) {
this.parseAttachmentFolder(child, output);
return;
}
const id = randomUUID();
const { title, icon, text } = await sequelize.transaction(
async (transaction) =>
documentImporter({
mimeType: "text/markdown",
fileName: child.name,
content:
child.children.length > 0
? ""
: await fs.readFile(child.path, "utf8"),
user,
ctx: createContext({ user, transaction }),
})
);
const existingDocumentIndex = output.documents.findIndex(
(doc) =>
doc.title === title &&
doc.collectionId === collectionId &&
doc.parentDocumentId === parentDocumentId
);
const existingDocument = output.documents[existingDocumentIndex];
// When there is a file and a folder with the same name this handles
// the case by combining the two into one document with nested children
if (existingDocument) {
docPathToIdMap.set(child.path, existingDocument.id);
if (existingDocument.text === "") {
output.documents[existingDocumentIndex].text = text;
}
await parseNodeChildren(
child.children,
collectionId,
existingDocument.id
);
} else {
docPathToIdMap.set(child.path, id);
output.documents.push({
id,
title,
icon,
text,
collectionId,
parentDocumentId,
path: child.path,
mimeType: "text/markdown",
});
await parseNodeChildren(child.children, collectionId, id);
}
})
);
};
// All nodes in the root level should be collections
for (const node of tree) {
if (node.children.length > 0) {
// Check if this is an attachments-only folder at root level
if (this.isAttachmentFolder(node)) {
this.parseAttachmentFolder(node, output);
continue;
}
const collectionId = randomUUID();
output.collections.push({
id: collectionId,
name: node.title,
});
await parseNodeChildren(node.children, collectionId);
} else {
Logger.debug("task", `Unhandled file in zip: ${node.path}`, {
fileOperationId: fileOperation.id,
});
}
}
for (const document of output.documents) {
// Check all of the attachments we've created against urls in the text
// and replace them out with attachment redirect urls before continuing.
for (const attachment of output.attachments) {
const encodedPath = encodeURI(attachment.path);
const attachmentFileName = path.basename(attachment.path);
const reference = `<<${attachment.id}>>`;
// Pull the collection and subdirectory out of the path name, upload
// folders in an export are relative to the document itself.
// Support both legacy bucket names (uploads/public) and generic attachment folders.
let normalizedAttachmentPath = encodedPath
.replace(
new RegExp(`(.*)/${Buckets.uploads}/`),
`${Buckets.uploads}/`
)
.replace(new RegExp(`(.*)/${Buckets.public}/`), `${Buckets.public}/`);
// Also try normalizing to just the folder containing the attachment
// This handles arbitrary folder names like "attachments/"
const attachmentDir = path.basename(path.dirname(attachment.path));
const genericNormalizedPath = `${attachmentDir}/${encodeURI(attachmentFileName)}`;
document.text = document.text
.replace(new RegExp(escapeRegExp(encodedPath), "g"), reference)
.replace(
new RegExp(`\\.?/?${escapeRegExp(normalizedAttachmentPath)}`, "g"),
reference
)
.replace(
new RegExp(`\\.?/?${escapeRegExp(genericNormalizedPath)}`, "g"),
reference
);
}
const basePath = path.dirname(document.path);
// check internal document links in the text and replace them with placeholders.
// When persisting, the placeholders will be replaced with the right urls.
const internalLinks = [
...document.text.matchAll(/\[[^\]]+\]\(([^)]+\.md)\)/g),
];
internalLinks.forEach((match) => {
const referredDocPath = match[1];
const normalizedDocPath = decodeURI(
path.normalize(`${basePath}/${referredDocPath}`)
);
const referredDocId = docPathToIdMap.get(normalizedDocPath);
if (referredDocId) {
document.text = document.text.replace(
referredDocPath,
`<<${referredDocId}>>`
);
}
});
}
return output;
}
}