Files
outline/server/commands/documentImporter.test.ts
T
Tom Moor 091346dfe8 chore: Migrate to vitest (#12272)
* wip

* Remove obsolete snapshots

* simplify

* chore(test): Convert mocks to TypeScript and tighten fetch mock types

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* Remove unneccessary patches

* Migrate to msw instead of custom fetch mock

* Address PR review comments

- Split chained vi.useFakeTimers().setSystemTime() into separate calls.
- Switch test setup to dynamic imports so EventEmitter.defaultMaxListeners
  assignment runs before module init (static imports were hoisted above it).
- Drop redundant NODE_ENV guard in monkeyPatchSequelizeErrorsForJest; its
  sole caller already gates on env.isTest.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-06 21:10:51 -04:00

367 lines
11 KiB
TypeScript

import path from "node:path";
import fs from "fs-extra";
import { createContext } from "@server/context";
import Attachment from "@server/models/Attachment";
import { sequelize } from "@server/storage/database";
import { buildUser } from "@server/test/factories";
import documentImporter from "./documentImporter";
vi.mock("@server/storage/files");
describe("documentImporter", () => {
it("should convert Word Document to markdown", async () => {
const user = await buildUser();
const fileName = "images.docx";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType:
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
const attachments = await Attachment.count({
where: {
teamId: user.teamId,
},
});
expect(attachments).toEqual(1);
expect(response.text).toContain("This is a test document for images");
expect(response.text).toContain("![](/api/attachments.redirect?id=");
expect(response.title).toEqual("images");
});
it("should not strip content after period in title", async () => {
const user = await buildUser();
const fileName = "01. test";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", "images.docx")
);
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType:
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
expect(response.text).toContain("This is a test document for images");
expect(response.title).toEqual("01. test");
});
it("should convert Word Document to markdown for application/octet-stream mimetype", async () => {
const user = await buildUser();
const fileName = "images.docx";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "application/octet-stream",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
const attachments = await Attachment.count({
where: {
teamId: user.teamId,
},
});
expect(attachments).toEqual(1);
expect(response.text).toContain("This is a test document for images");
expect(response.text).toContain("![](/api/attachments.redirect?id=");
expect(response.title).toEqual("images");
});
it("should error when a file with application/octet-stream mimetype doesn't have .docx extension", async () => {
const user = await buildUser();
const fileName = "normal.docx.txt";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
let error;
try {
await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "application/octet-stream",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
} catch (err) {
error = err.message;
}
expect(error).toEqual("File type application/octet-stream not supported");
});
it("should convert Word Document on Windows to markdown", async () => {
const user = await buildUser();
const fileName = "images.docx";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "application/octet-stream",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
const attachments = await Attachment.count({
where: {
teamId: user.teamId,
},
});
expect(attachments).toEqual(1);
expect(response.text).toContain("This is a test document for images");
expect(response.text).toContain("![](/api/attachments.redirect?id=");
expect(response.title).toEqual("images");
});
it("should convert HTML Document to markdown", async () => {
const user = await buildUser();
const fileName = "webpage.html";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName),
"utf8"
);
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/html",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
expect(response.text).toContain("Text paragraph");
expect(response.title).toEqual("Heading 1");
});
it("should convert Confluence Word output to markdown", async () => {
const user = await buildUser();
const fileName = "confluence.doc";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "application/msword",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
expect(response.text).toContain("this is a test document");
expect(response.title).toEqual("Heading 1");
});
it("should load markdown", async () => {
const user = await buildUser();
const fileName = "markdown.md";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName),
"utf8"
);
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/plain",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
expect(response.text).toContain("This is a test paragraph");
expect(response.title).toEqual("Heading 1");
});
it("should handle only title", async () => {
const user = await buildUser();
const fileName = "markdown.md";
const content = `# Title`;
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/plain",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
expect(response.text).toEqual("");
expect(response.title).toEqual("Title");
});
it("should convert frontmatter to yaml codeblock", async () => {
const user = await buildUser();
const fileName = "markdown-frontmatter.md";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName),
"utf8"
);
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/plain",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
expect(response.text).toContain("```yaml");
expect(response.text).toContain("title: Test Document");
expect(response.text).toContain("date: 2024-01-15");
expect(response.text).toContain("tags: [test, markdown]");
expect(response.text).toContain("```");
expect(response.text).toContain("This is content after frontmatter");
expect(response.title).toEqual("Heading 1");
});
it("should fallback to extension if mimetype unknown", async () => {
const user = await buildUser();
const fileName = "markdown.md";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName),
"utf8"
);
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "application/lol",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
expect(response.text).toContain("This is a test paragraph");
expect(response.title).toEqual("Heading 1");
});
it("should error with unknown file type", async () => {
const user = await buildUser();
const fileName = "empty.zip";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
let error;
try {
await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "executable/zip",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
} catch (err) {
error = err.message;
}
expect(error).toEqual("File type executable/zip not supported");
});
it("should preserve dollar signs in HTML input", async () => {
const user = await buildUser();
const fileName = "test.html";
const content = `
<!DOCTYPE html>
<html>
<head>
<title>Test</title>
</head>
<body>
<p>$100</p>
</body>
</html>
`;
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/html",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
expect(response.text).toEqual("$100");
});
it("should not escape dollar signs in inline code in HTML input", async () => {
const user = await buildUser();
const fileName = "test.html";
const content = `
<!DOCTYPE html>
<html>
<head>
<title>Test</title>
</head>
<body>
<code>echo $foo</code>
</body>
</html>
`;
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/html",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
expect(response.text).toEqual("`echo $foo`");
});
it("should not escape dollar signs in code blocks in HTML input", async () => {
const user = await buildUser();
const fileName = "test.html";
// Using .code-block class which the schema recognizes for code blocks
const content = `
<!DOCTYPE html>
<html>
<head>
<title>Test</title>
</head>
<body>
<div class="code-block" data-language="javascript"><pre><code>echo $foo
echo $bar</code></pre></div>
</body>
</html>
`;
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/html",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
expect(response.text).toEqual("```javascript\necho $foo\necho $bar\n```");
});
});