mirror of
https://github.com/outline/outline.git
synced 2026-06-13 03:14:59 +03:00
Convert markdown frontmatter to YAML codeblocks on import (#11420)
* Initial plan * Add frontmatter to YAML codeblock conversion Co-authored-by: tommoor <380914+tommoor@users.noreply.github.com> * Add edge case tests and fix frontmatter regex, install types Co-authored-by: tommoor <380914+tommoor@users.noreply.github.com> * Address code review feedback - improve template literal readability Co-authored-by: tommoor <380914+tommoor@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: tommoor <380914+tommoor@users.noreply.github.com>
This commit is contained in:
@@ -296,6 +296,7 @@
|
||||
"@types/invariant": "^2.2.37",
|
||||
"@types/ioredis-mock": "^8.2.6",
|
||||
"@types/jest": "^29.5.14",
|
||||
"@types/js-yaml": "^4.0.9",
|
||||
"@types/jsonwebtoken": "^8.5.9",
|
||||
"@types/katex": "^0.16.7",
|
||||
"@types/koa": "^2.15.0",
|
||||
|
||||
@@ -213,6 +213,32 @@ describe("documentImporter", () => {
|
||||
expect(response.title).toEqual("Title");
|
||||
});
|
||||
|
||||
it("should convert frontmatter to yaml codeblock", async () => {
|
||||
const user = await buildUser();
|
||||
const fileName = "markdown-frontmatter.md";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName),
|
||||
"utf8"
|
||||
);
|
||||
const response = await sequelize.transaction((transaction) =>
|
||||
documentImporter({
|
||||
user,
|
||||
mimeType: "text/plain",
|
||||
fileName,
|
||||
content,
|
||||
ctx: createContext({ user, transaction }),
|
||||
})
|
||||
);
|
||||
|
||||
expect(response.text).toContain("```yaml");
|
||||
expect(response.text).toContain("title: Test Document");
|
||||
expect(response.text).toContain("date: 2024-01-15");
|
||||
expect(response.text).toContain("tags: [test, markdown]");
|
||||
expect(response.text).toContain("```");
|
||||
expect(response.text).toContain("This is content after frontmatter");
|
||||
expect(response.title).toEqual("Heading 1");
|
||||
});
|
||||
|
||||
it("should fallback to extension if mimetype unknown", async () => {
|
||||
const user = await buildUser();
|
||||
const fileName = "markdown.md";
|
||||
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
---
|
||||
title: Test Document
|
||||
date: 2024-01-15
|
||||
tags: [test, markdown]
|
||||
author: John Doe
|
||||
---
|
||||
|
||||
# Heading 1
|
||||
|
||||
This is content after frontmatter.
|
||||
|
||||
## Heading 2
|
||||
|
||||
More content here.
|
||||
@@ -148,6 +148,114 @@ Jane,24,`;
|
||||
expect(result.title).toEqual("");
|
||||
expect(result.text).toContain("Subtitle");
|
||||
});
|
||||
|
||||
it("should convert frontmatter to yaml codeblock", async () => {
|
||||
const md = `---
|
||||
title: Test Document
|
||||
date: 2024-01-15
|
||||
tags: [test, markdown]
|
||||
---
|
||||
|
||||
# My Title
|
||||
|
||||
Content after frontmatter`;
|
||||
const result = await DocumentConverter.convert(
|
||||
md,
|
||||
"test.md",
|
||||
"text/markdown"
|
||||
);
|
||||
|
||||
// Frontmatter should be converted to a YAML codeblock
|
||||
expect(result.text).toContain("```yaml");
|
||||
expect(result.text).toContain("title: Test Document");
|
||||
expect(result.text).toContain("date: 2024-01-15");
|
||||
expect(result.text).toContain("tags: [test, markdown]");
|
||||
expect(result.text).toContain("```");
|
||||
// Content should still be present
|
||||
expect(result.text).toContain("Content after frontmatter");
|
||||
// H1 should be extracted as title
|
||||
expect(result.title).toEqual("My Title");
|
||||
});
|
||||
|
||||
it("should handle markdown without frontmatter", async () => {
|
||||
const md = "# Title\n\nRegular content";
|
||||
const result = await DocumentConverter.convert(
|
||||
md,
|
||||
"test.md",
|
||||
"text/markdown"
|
||||
);
|
||||
|
||||
expect(result.title).toEqual("Title");
|
||||
expect(result.text).toContain("Regular content");
|
||||
expect(result.text).not.toContain("```yaml");
|
||||
});
|
||||
|
||||
it("should handle frontmatter with no content after", async () => {
|
||||
const md = `---
|
||||
title: Only Frontmatter
|
||||
---`;
|
||||
const result = await DocumentConverter.convert(
|
||||
md,
|
||||
"test.md",
|
||||
"text/markdown"
|
||||
);
|
||||
|
||||
expect(result.text).toContain("```yaml");
|
||||
expect(result.text).toContain("title: Only Frontmatter");
|
||||
expect(result.text).toContain("```");
|
||||
expect(result.title).toEqual("");
|
||||
});
|
||||
|
||||
it("should not convert incomplete frontmatter", async () => {
|
||||
const md = `---
|
||||
title: Test
|
||||
Content without closing delimiter`;
|
||||
const result = await DocumentConverter.convert(
|
||||
md,
|
||||
"test.md",
|
||||
"text/markdown"
|
||||
);
|
||||
|
||||
// Should not convert as it's not proper frontmatter
|
||||
expect(result.text).not.toContain("```yaml");
|
||||
expect(result.text).toContain("title: Test");
|
||||
});
|
||||
|
||||
it("should not convert frontmatter if not at start", async () => {
|
||||
const md = `# Title
|
||||
|
||||
Some content
|
||||
|
||||
---
|
||||
title: Test
|
||||
---
|
||||
|
||||
More content`;
|
||||
const result = await DocumentConverter.convert(
|
||||
md,
|
||||
"test.md",
|
||||
"text/markdown"
|
||||
);
|
||||
|
||||
// Should not convert as frontmatter must be at the start
|
||||
expect(result.text).not.toContain("```yaml");
|
||||
});
|
||||
|
||||
it("should handle invalid YAML in frontmatter", async () => {
|
||||
const md = `---
|
||||
invalid: yaml: content: here
|
||||
---
|
||||
|
||||
Content`;
|
||||
const result = await DocumentConverter.convert(
|
||||
md,
|
||||
"test.md",
|
||||
"text/markdown"
|
||||
);
|
||||
|
||||
// Should not convert invalid YAML
|
||||
expect(result.text).not.toContain("```yaml");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import { simpleParser } from "mailparser";
|
||||
import mammoth from "mammoth";
|
||||
import type { Node } from "prosemirror-model";
|
||||
import { DOMParser as ProsemirrorDOMParser } from "prosemirror-model";
|
||||
import yaml from "js-yaml";
|
||||
import { ProsemirrorHelper as SharedProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
|
||||
import { schema, serializer } from "@server/editor";
|
||||
import { FileImportError } from "@server/errors";
|
||||
@@ -201,24 +202,30 @@ export class DocumentConverter {
|
||||
fileName: string,
|
||||
mimeType: string
|
||||
): Promise<string> {
|
||||
let markdown: string;
|
||||
|
||||
switch (mimeType) {
|
||||
case "text/plain":
|
||||
case "text/markdown":
|
||||
return this.bufferToString(content);
|
||||
markdown = this.bufferToString(content);
|
||||
break;
|
||||
case "text/csv":
|
||||
return this.csvToMarkdown(content);
|
||||
default:
|
||||
break;
|
||||
default: {
|
||||
const extension = fileName.split(".").pop();
|
||||
switch (extension) {
|
||||
case "md":
|
||||
case "markdown":
|
||||
markdown = this.bufferToString(content);
|
||||
break;
|
||||
default:
|
||||
throw FileImportError(`File type ${mimeType} not supported`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const extension = fileName.split(".").pop();
|
||||
switch (extension) {
|
||||
case "md":
|
||||
case "markdown":
|
||||
return this.bufferToString(content);
|
||||
default:
|
||||
throw FileImportError(`File type ${mimeType} not supported`);
|
||||
}
|
||||
// Process frontmatter and convert it to a YAML codeblock
|
||||
return this.processFrontmatter(markdown);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -404,4 +411,37 @@ export class DocumentConverter {
|
||||
private static bufferToString(content: Buffer | string): string {
|
||||
return typeof content === "string" ? content : content.toString("utf8");
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse and convert frontmatter to a YAML codeblock.
|
||||
*
|
||||
* @param content The markdown content that may contain frontmatter.
|
||||
* @returns The markdown content with frontmatter converted to a YAML codeblock.
|
||||
*/
|
||||
private static processFrontmatter(content: string): string {
|
||||
// Frontmatter must start at the beginning of the document
|
||||
const frontmatterRegex = /^---\n([\s\S]*?)\n---(?:\n|$)/;
|
||||
const match = content.match(frontmatterRegex);
|
||||
|
||||
if (!match) {
|
||||
return content;
|
||||
}
|
||||
|
||||
const frontmatterContent = match[1];
|
||||
const remainingContent = content.slice(match[0].length);
|
||||
|
||||
// Validate that the frontmatter is valid YAML
|
||||
try {
|
||||
yaml.load(frontmatterContent);
|
||||
} catch {
|
||||
// If it's not valid YAML, return content unchanged
|
||||
return content;
|
||||
}
|
||||
|
||||
// Convert frontmatter to a YAML codeblock
|
||||
const codeBlockDelimiter = "```";
|
||||
const yamlCodeblock = `${codeBlockDelimiter}yaml\n${frontmatterContent}\n${codeBlockDelimiter}\n\n`;
|
||||
|
||||
return yamlCodeblock + remainingContent;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8119,6 +8119,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@types/js-yaml@npm:^4.0.9":
|
||||
version: 4.0.9
|
||||
resolution: "@types/js-yaml@npm:4.0.9"
|
||||
checksum: 10c0/24de857aa8d61526bbfbbaa383aa538283ad17363fcd5bb5148e2c7f604547db36646440e739d78241ed008702a8920665d1add5618687b6743858fae00da211
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@types/jsdom@npm:^20.0.0":
|
||||
version: 20.0.1
|
||||
resolution: "@types/jsdom@npm:20.0.1"
|
||||
@@ -17337,6 +17344,7 @@ __metadata:
|
||||
"@types/invariant": "npm:^2.2.37"
|
||||
"@types/ioredis-mock": "npm:^8.2.6"
|
||||
"@types/jest": "npm:^29.5.14"
|
||||
"@types/js-yaml": "npm:^4.0.9"
|
||||
"@types/jsonwebtoken": "npm:^8.5.9"
|
||||
"@types/katex": "npm:^0.16.7"
|
||||
"@types/koa": "npm:^2.15.0"
|
||||
|
||||
Reference in New Issue
Block a user