Compare commits

...

2 Commits

Author SHA1 Message Date
codegen-sh[bot] cd8a30c476 Fix test content to avoid markdown heading override
- Change test content from '# Test content' to 'Test content without heading'
- This prevents the documentImporter from extracting title from content instead of filename
- Ensures tests properly validate the filename-based title extraction fix
2025-07-21 11:53:58 +00:00
codegen-sh[bot] 167fb4d7ff Fix ZIP import folder name truncation issue
- Replace overly broad regex in documentImporter.ts that was incorrectly treating dots in folder names as file extensions
- Change from /\.[^/.]+$/ to /\.(docx|html|md|markdown|csv|txt)$/i to only match known file extensions
- Preserve folder names like '01. Introduction' instead of truncating to '01'
- Add comprehensive tests covering folder names with dots, file extension removal, and edge cases
- Fixes #9694
2025-07-21 11:33:36 +00:00
2 changed files with 130 additions and 1 deletions
+128
View File
@@ -314,4 +314,132 @@ describe("documentImporter", () => {
);
expect(response.text).toEqual("```\necho $foo\n```");
});
describe("filename title extraction", () => {
it("should preserve folder names with dots", async () => {
const user = await buildUser();
const testCases = [
{ fileName: "01. Introduction", expectedTitle: "01. Introduction" },
{
fileName: "02. Getting Started",
expectedTitle: "02. Getting Started",
},
{
fileName: "Chapter 1. Overview",
expectedTitle: "Chapter 1. Overview",
},
{
fileName: "Section 3.1 Details",
expectedTitle: "Section 3.1 Details",
},
];
for (const { fileName, expectedTitle } of testCases) {
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/markdown",
fileName,
content: "Test content without heading",
ctx: createContext({ user, transaction }),
})
);
expect(response.title).toEqual(expectedTitle);
}
});
it("should remove known file extensions", async () => {
const user = await buildUser();
const testCases = [
{ fileName: "document.md", expectedTitle: "document" },
{ fileName: "file.markdown", expectedTitle: "file" },
{ fileName: "spreadsheet.csv", expectedTitle: "spreadsheet" },
{ fileName: "webpage.html", expectedTitle: "webpage" },
{ fileName: "word-doc.docx", expectedTitle: "word-doc" },
{ fileName: "notes.txt", expectedTitle: "notes" },
];
for (const { fileName, expectedTitle } of testCases) {
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/markdown",
fileName,
content: "Test content without heading",
ctx: createContext({ user, transaction }),
})
);
expect(response.title).toEqual(expectedTitle);
}
});
it("should handle files with multiple dots correctly", async () => {
const user = await buildUser();
const testCases = [
{ fileName: "file.with.dots.md", expectedTitle: "file.with.dots" },
{ fileName: "version.1.2.html", expectedTitle: "version.1.2" },
{ fileName: "data.backup.csv", expectedTitle: "data.backup" },
{ fileName: "my.document.v2.docx", expectedTitle: "my.document.v2" },
];
for (const { fileName, expectedTitle } of testCases) {
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/markdown",
fileName,
content: "Test content without heading",
ctx: createContext({ user, transaction }),
})
);
expect(response.title).toEqual(expectedTitle);
}
});
it("should preserve files without known extensions", async () => {
const user = await buildUser();
const testCases = [
{ fileName: "README", expectedTitle: "README" },
{ fileName: "file.unknown", expectedTitle: "file.unknown" },
{ fileName: "script.py", expectedTitle: "script.py" },
{ fileName: "config.json", expectedTitle: "config.json" },
];
for (const { fileName, expectedTitle } of testCases) {
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/markdown",
fileName,
content: "Test content without heading",
ctx: createContext({ user, transaction }),
})
);
expect(response.title).toEqual(expectedTitle);
}
});
it("should handle case-insensitive extensions", async () => {
const user = await buildUser();
const testCases = [
{ fileName: "document.MD", expectedTitle: "document" },
{ fileName: "file.HTML", expectedTitle: "file" },
{ fileName: "data.CSV", expectedTitle: "data" },
{ fileName: "word.DOCX", expectedTitle: "word" },
];
for (const { fileName, expectedTitle } of testCases) {
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/markdown",
fileName,
content: "Test content without heading",
ctx: createContext({ user, transaction }),
})
);
expect(response.title).toEqual(expectedTitle);
}
});
});
});
+2 -1
View File
@@ -35,7 +35,8 @@ async function documentImporter({
fileName,
mimeType
);
let title = fileName.replace(/\.[^/.]+$/, "");
// Only remove known file extensions, preserve dots that are part of the filename/folder name
let title = fileName.replace(/\.(docx|html|md|markdown|csv|txt)$/i, "");
// find and extract emoji near the beginning of the document.
const regex = emojiRegex();