Compare commits

...

1 Commits

Author SHA1 Message Date
Tom Moor 1157597051 fix: Markdown escape characters left in title on import 2024-12-11 23:24:22 -05:00
5 changed files with 54 additions and 37 deletions
+1 -5
View File
@@ -1,5 +1,4 @@
import emojiRegex from "emoji-regex";
import escapeRegExp from "lodash/escapeRegExp";
import truncate from "lodash/truncate";
import parseTitle from "@shared/utils/parseTitle";
import { DocumentValidation } from "@shared/validations";
@@ -51,10 +50,7 @@ async function documentImporter({
if (text.trim().startsWith("# ")) {
const result = parseTitle(text);
title = result.title;
text = text
.trim()
.replace(new RegExp(`#\\s+${escapeRegExp(title)}`), "")
.trimStart();
text = text.replace(/^.+(\n|$)/, "");
}
// Replace any <br> generated by the turndown plugin with escaped newlines
+2 -31
View File
@@ -1,5 +1,6 @@
import { taskListItems, strikethrough } from "@joplin/turndown-plugin-gfm";
import TurndownService from "turndown";
import { escape } from "@shared/utils/markdown";
import breaks from "./breaks";
import emptyLists from "./emptyLists";
import emptyParagraph from "./emptyParagraph";
@@ -41,36 +42,6 @@ const service = new TurndownService({
.use(breaks)
.use(emptyLists);
const escapes: [RegExp, string][] = [
[/\\/g, "\\\\"],
[/\*/g, "\\*"],
[/^-/g, "\\-"],
[/^\+ /g, "\\+ "],
[/^(=+)/g, "\\$1"],
[/^(#{1,6}) /g, "\\$1 "],
[/`/g, "\\`"],
[/^~~~/g, "\\~~~"],
[/\[/g, "\\["],
[/\]/g, "\\]"],
[/\(/g, "\\("], // OLN-91
[/\)/g, "\\)"], // OLN-91
[/^>/g, "\\>"],
[/_/g, "\\_"],
[/^(\d+)\. /g, "$1\\. "],
[/\$/g, "\\$"],
];
/**
* Overrides the Markdown escaping, as documented here:
* https://github.com/mixmark-io/turndown/blob/4499b5c313d30a3189a58fdd74fc4ed4b2428afd/README.md#overriding-turndownserviceprototypeescape
*
* @param text The string to escape
* @returns A string with Markdown syntax escaped
*/
service.escape = function (text) {
return escapes.reduce(function (accumulator, escape) {
return accumulator.replace(escape[0], escape[1]);
}, text);
};
service.escape = escape;
export default service;
+40
View File
@@ -0,0 +1,40 @@
const escapes: [RegExp, string][] = [
[/\\/g, "\\\\"],
[/\*/g, "\\*"],
[/^-/g, "\\-"],
[/^\+ /g, "\\+ "],
[/^(=+)/g, "\\$1"],
[/^(#{1,6}) /g, "\\$1 "],
[/`/g, "\\`"],
[/^~~~/g, "\\~~~"],
[/\[/g, "\\["],
[/\]/g, "\\]"],
[/\(/g, "\\("], // OLN-91
[/\)/g, "\\)"], // OLN-91
[/^>/g, "\\>"],
[/_/g, "\\_"],
[/^(\d+)\. /g, "$1\\. "],
[/\$/g, "\\$"],
];
/**
* Escape markdown characters in a string
*
* @param text - The text to escape
* @returns The escaped text
*/
export const escape = function (text: string) {
return escapes.reduce(function (accumulator, esc) {
return accumulator.replace(esc[0], esc[1]);
}, text);
};
/**
* Unescape markdown characters in a string
*
* @param text - The text to unescape
* @returns The unescaped text
*/
export const unescape = function (text: string) {
return text.replace(/\\([\\*+-\d.])/g, "$1");
};
+9
View File
@@ -3,14 +3,23 @@ import parseTitle from "./parseTitle";
it("should trim the title", () => {
expect(parseTitle(`# Lots of space `).title).toBe("Lots of space");
});
it("should remove escaped characters", () => {
expect(parseTitle(`# 1\\. Title`).title).toBe("1. Title");
expect(parseTitle(`# 12\\. Title`).title).toBe("12. Title");
expect(parseTitle(`# 1\\.2 Title`).title).toBe("1.2 Title");
});
it("should extract first title", () => {
expect(parseTitle(`# Title one\n# Title two`).title).toBe("Title one");
});
it("should parse emoji if first character", () => {
const parsed = parseTitle(`# 😀 Title`);
expect(parsed.title).toBe("😀 Title");
expect(parsed.emoji).toBe("😀");
});
it("should not parse emoji if not first character", () => {
const parsed = parseTitle(`# Title 🌈`);
expect(parsed.title).toBe("Title 🌈");
+2 -1
View File
@@ -1,11 +1,12 @@
import emojiRegex from "emoji-regex";
import { unescape } from "./markdown";
export default function parseTitle(text = "") {
const regex = emojiRegex();
// find and extract title
const firstLine = text.trim().split(/\r?\n/)[0];
const title = firstLine.replace(/^#/, "").trim();
const title = unescape(firstLine.replace(/^#/, "").trim());
// find and extract first emoji
const matches = regex.exec(title);