Convert markdown frontmatter to YAML codeblocks on import (#11420)

* Initial plan

* Add frontmatter to YAML codeblock conversion

Co-authored-by: tommoor <380914+tommoor@users.noreply.github.com>

* Add edge case tests and fix frontmatter regex, install types

Co-authored-by: tommoor <380914+tommoor@users.noreply.github.com>

* Address code review feedback - improve template literal readability

Co-authored-by: tommoor <380914+tommoor@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: tommoor <380914+tommoor@users.noreply.github.com>
This commit is contained in:
Copilot
2026-02-12 18:32:15 -05:00
committed by GitHub
parent 3a875d4466
commit c382e1233b
6 changed files with 208 additions and 11 deletions
+1
View File
@@ -296,6 +296,7 @@
"@types/invariant": "^2.2.37",
"@types/ioredis-mock": "^8.2.6",
"@types/jest": "^29.5.14",
"@types/js-yaml": "^4.0.9",
"@types/jsonwebtoken": "^8.5.9",
"@types/katex": "^0.16.7",
"@types/koa": "^2.15.0",
+26
View File
@@ -213,6 +213,32 @@ describe("documentImporter", () => {
expect(response.title).toEqual("Title");
});
it("should convert frontmatter to yaml codeblock", async () => {
const user = await buildUser();
const fileName = "markdown-frontmatter.md";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName),
"utf8"
);
const response = await sequelize.transaction((transaction) =>
documentImporter({
user,
mimeType: "text/plain",
fileName,
content,
ctx: createContext({ user, transaction }),
})
);
expect(response.text).toContain("```yaml");
expect(response.text).toContain("title: Test Document");
expect(response.text).toContain("date: 2024-01-15");
expect(response.text).toContain("tags: [test, markdown]");
expect(response.text).toContain("```");
expect(response.text).toContain("This is content after frontmatter");
expect(response.title).toEqual("Heading 1");
});
it("should fallback to extension if mimetype unknown", async () => {
const user = await buildUser();
const fileName = "markdown.md";
+14
View File
@@ -0,0 +1,14 @@
---
title: Test Document
date: 2024-01-15
tags: [test, markdown]
author: John Doe
---
# Heading 1
This is content after frontmatter.
## Heading 2
More content here.
+108
View File
@@ -148,6 +148,114 @@ Jane,24,`;
expect(result.title).toEqual("");
expect(result.text).toContain("Subtitle");
});
it("should convert frontmatter to yaml codeblock", async () => {
const md = `---
title: Test Document
date: 2024-01-15
tags: [test, markdown]
---
# My Title
Content after frontmatter`;
const result = await DocumentConverter.convert(
md,
"test.md",
"text/markdown"
);
// Frontmatter should be converted to a YAML codeblock
expect(result.text).toContain("```yaml");
expect(result.text).toContain("title: Test Document");
expect(result.text).toContain("date: 2024-01-15");
expect(result.text).toContain("tags: [test, markdown]");
expect(result.text).toContain("```");
// Content should still be present
expect(result.text).toContain("Content after frontmatter");
// H1 should be extracted as title
expect(result.title).toEqual("My Title");
});
it("should handle markdown without frontmatter", async () => {
const md = "# Title\n\nRegular content";
const result = await DocumentConverter.convert(
md,
"test.md",
"text/markdown"
);
expect(result.title).toEqual("Title");
expect(result.text).toContain("Regular content");
expect(result.text).not.toContain("```yaml");
});
it("should handle frontmatter with no content after", async () => {
const md = `---
title: Only Frontmatter
---`;
const result = await DocumentConverter.convert(
md,
"test.md",
"text/markdown"
);
expect(result.text).toContain("```yaml");
expect(result.text).toContain("title: Only Frontmatter");
expect(result.text).toContain("```");
expect(result.title).toEqual("");
});
it("should not convert incomplete frontmatter", async () => {
const md = `---
title: Test
Content without closing delimiter`;
const result = await DocumentConverter.convert(
md,
"test.md",
"text/markdown"
);
// Should not convert as it's not proper frontmatter
expect(result.text).not.toContain("```yaml");
expect(result.text).toContain("title: Test");
});
it("should not convert frontmatter if not at start", async () => {
const md = `# Title
Some content
---
title: Test
---
More content`;
const result = await DocumentConverter.convert(
md,
"test.md",
"text/markdown"
);
// Should not convert as frontmatter must be at the start
expect(result.text).not.toContain("```yaml");
});
it("should handle invalid YAML in frontmatter", async () => {
const md = `---
invalid: yaml: content: here
---
Content`;
const result = await DocumentConverter.convert(
md,
"test.md",
"text/markdown"
);
// Should not convert invalid YAML
expect(result.text).not.toContain("```yaml");
});
});
});
+51 -11
View File
@@ -5,6 +5,7 @@ import { simpleParser } from "mailparser";
import mammoth from "mammoth";
import type { Node } from "prosemirror-model";
import { DOMParser as ProsemirrorDOMParser } from "prosemirror-model";
import yaml from "js-yaml";
import { ProsemirrorHelper as SharedProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
import { schema, serializer } from "@server/editor";
import { FileImportError } from "@server/errors";
@@ -201,24 +202,30 @@ export class DocumentConverter {
fileName: string,
mimeType: string
): Promise<string> {
let markdown: string;
switch (mimeType) {
case "text/plain":
case "text/markdown":
return this.bufferToString(content);
markdown = this.bufferToString(content);
break;
case "text/csv":
return this.csvToMarkdown(content);
default:
break;
default: {
const extension = fileName.split(".").pop();
switch (extension) {
case "md":
case "markdown":
markdown = this.bufferToString(content);
break;
default:
throw FileImportError(`File type ${mimeType} not supported`);
}
}
}
const extension = fileName.split(".").pop();
switch (extension) {
case "md":
case "markdown":
return this.bufferToString(content);
default:
throw FileImportError(`File type ${mimeType} not supported`);
}
// Process frontmatter and convert it to a YAML codeblock
return this.processFrontmatter(markdown);
}
/**
@@ -404,4 +411,37 @@ export class DocumentConverter {
private static bufferToString(content: Buffer | string): string {
return typeof content === "string" ? content : content.toString("utf8");
}
/**
* Parse and convert frontmatter to a YAML codeblock.
*
* @param content The markdown content that may contain frontmatter.
* @returns The markdown content with frontmatter converted to a YAML codeblock.
*/
private static processFrontmatter(content: string): string {
// Frontmatter must start at the beginning of the document
const frontmatterRegex = /^---\n([\s\S]*?)\n---(?:\n|$)/;
const match = content.match(frontmatterRegex);
if (!match) {
return content;
}
const frontmatterContent = match[1];
const remainingContent = content.slice(match[0].length);
// Validate that the frontmatter is valid YAML
try {
yaml.load(frontmatterContent);
} catch {
// If it's not valid YAML, return content unchanged
return content;
}
// Convert frontmatter to a YAML codeblock
const codeBlockDelimiter = "```";
const yamlCodeblock = `${codeBlockDelimiter}yaml\n${frontmatterContent}\n${codeBlockDelimiter}\n\n`;
return yamlCodeblock + remainingContent;
}
}
+8
View File
@@ -8119,6 +8119,13 @@ __metadata:
languageName: node
linkType: hard
"@types/js-yaml@npm:^4.0.9":
version: 4.0.9
resolution: "@types/js-yaml@npm:4.0.9"
checksum: 10c0/24de857aa8d61526bbfbbaa383aa538283ad17363fcd5bb5148e2c7f604547db36646440e739d78241ed008702a8920665d1add5618687b6743858fae00da211
languageName: node
linkType: hard
"@types/jsdom@npm:^20.0.0":
version: 20.0.1
resolution: "@types/jsdom@npm:20.0.1"
@@ -17337,6 +17344,7 @@ __metadata:
"@types/invariant": "npm:^2.2.37"
"@types/ioredis-mock": "npm:^8.2.6"
"@types/jest": "npm:^29.5.14"
"@types/js-yaml": "npm:^4.0.9"
"@types/jsonwebtoken": "npm:^8.5.9"
"@types/katex": "npm:^0.16.7"
"@types/koa": "npm:^2.15.0"