diff --git a/server/utils/ProsemirrorHelper.test.ts b/server/utils/ProsemirrorHelper.test.ts
index 36a20bc5a2..87402ca4ad 100644
--- a/server/utils/ProsemirrorHelper.test.ts
+++ b/server/utils/ProsemirrorHelper.test.ts
@@ -2,7 +2,7 @@ import crypto from "node:crypto";
import { Node } from "prosemirror-model";
import { ProsemirrorHelper as ServerProsemirrorHelper } from "@server/models/helpers/ProsemirrorHelper";
import { ProsemirrorHelper } from "@shared/utils/ProsemirrorHelper";
-import { schema } from "@server/editor";
+import { parser, schema, serializer } from "@server/editor";
// Note: The test is here rather than shared to access the schema
describe("#ProsemirrorHelper", () => {
@@ -184,6 +184,115 @@ describe("#ProsemirrorHelper", () => {
});
});
+ describe("table markdown round trip", () => {
+ const roundTrip = (md: string) => {
+ const doc = parser.parse(md);
+ expect(doc).not.toBeNull();
+ const first = serializer.serialize(doc!);
+ const second = serializer.serialize(parser.parse(first)!);
+ return { first, second };
+ };
+
+ const getCellTexts = (md: string) => {
+ const doc = parser.parse(md)!;
+ const table = doc.content.firstChild!;
+ expect(table.type.name).toBe("table");
+ const rows: string[][] = [];
+ table.forEach((row) => {
+ const cells: string[] = [];
+ row.forEach((cell) => cells.push(cell.textContent));
+ rows.push(cells);
+ });
+ return rows;
+ };
+
+ it("preserves a single inline code span containing pipes", () => {
+ const cells = getCellTexts(
+ ["| A | B |", "| --- | --- |", "| x | `|y|` |", ""].join("\n")
+ );
+
+ expect(cells).toEqual([
+ ["A", "B"],
+ ["x", "|y|"],
+ ]);
+ });
+
+ it("preserves multiple inline code spans with pipes in the same cell", () => {
+ const md = [
+ "| Condition | Facts |",
+ "| --- | --- |",
+ "| Absolute time difference | The system checks `|Clock_NTP_Camera1 - Clock_GPS_Camera1|` and `|Clock_NTP_Camera2 - Clock_GPS_Camera2|`. |",
+ "",
+ ].join("\n");
+
+ const cells = getCellTexts(md);
+ expect(cells).toHaveLength(2);
+ expect(cells[1][0]).toBe("Absolute time difference");
+ expect(cells[1][1]).toBe(
+ "The system checks |Clock_NTP_Camera1 - Clock_GPS_Camera1| and |Clock_NTP_Camera2 - Clock_GPS_Camera2|."
+ );
+ });
+
+ it("preserves inline math containing pipes", () => {
+ const cells = getCellTexts(
+ ["| A | B |", "| --- | --- |", "| x | $|a-b|$ |", ""].join("\n")
+ );
+
+ expect(cells[1][0]).toBe("x");
+ expect(cells[1][1]).toBe("|a-b|");
+ });
+
+ it("preserves identifiers with underscores and braces inside code spans", () => {
+ const cells = getCellTexts(
+ [
+ "| Field | Value |",
+ "| --- | --- |",
+ "| ID | `foo_{bar}|baz_{qux}` |",
+ "",
+ ].join("\n")
+ );
+
+ expect(cells[1][1]).toBe("foo_{bar}|baz_{qux}");
+ });
+
+ it("re-serializes a table with code-span pipes idempotently", () => {
+ const { first, second } = roundTrip(
+ ["| A | B |", "| --- | --- |", "| x | `|y|` |", ""].join("\n")
+ );
+
+ expect(second).toBe(first);
+ });
+
+ it("re-serializes a table with prose plus code-span pipes idempotently", () => {
+ const { first, second } = roundTrip(
+ [
+ "| Condition | Facts |",
+ "| --- | --- |",
+ "| Absolute time difference | The system checks `|Clock_NTP - Clock_GPS|`. |",
+ "",
+ ].join("\n")
+ );
+
+ expect(second).toBe(first);
+ });
+
+ it("re-serializes a table with inline math pipes idempotently", () => {
+ const { first, second } = roundTrip(
+ ["| A | B |", "| --- | --- |", "| x | $|a-b|$ |", ""].join("\n")
+ );
+
+ expect(second).toBe(first);
+ });
+
+ it("still splits cells on unescaped pipes outside code spans", () => {
+ const cells = getCellTexts(
+ ["| A | B | C |", "| --- | --- | --- |", "| x | y | z |", ""].join("\n")
+ );
+
+ expect(cells[1]).toEqual(["x", "y", "z"]);
+ });
+ });
+
describe("#removeMarks", () => {
it("preserves table cell background color when removing comment marks", () => {
const doc = Node.fromJSON(schema, {
diff --git a/shared/editor/lib/markdown/serializer.ts b/shared/editor/lib/markdown/serializer.ts
index 85a1d82ad3..73be98853a 100644
--- a/shared/editor/lib/markdown/serializer.ts
+++ b/shared/editor/lib/markdown/serializer.ts
@@ -345,16 +345,13 @@ export class MarkdownSerializerState {
this.text(this.markString(add, true, parent, index), false);
}
- // Render the node. Special case code marks, since their content is not
- // escaped, apart from pipes in tables.
+ // Render the node. Special case code marks, since their content is
+ // not escaped. Pipes inside code spans are safe inside tables because
+ // the block table rule recognises code spans when splitting cells.
if (noEsc && node.isText) {
- const text = this.inTable
- ? node.text.replace(/\|/gi, "\\$&")
- : node.text;
-
this.text(
this.markString(inner, true, parent, index) +
- text +
+ node.text +
this.markString(inner, false, parent, index + 1),
false
);
diff --git a/shared/editor/rules/tables.test.ts b/shared/editor/rules/tables.test.ts
new file mode 100644
index 0000000000..482f8e6bb5
--- /dev/null
+++ b/shared/editor/rules/tables.test.ts
@@ -0,0 +1,134 @@
+import markdownit from "markdown-it";
+import tablesRule from "./tables";
+
+type Cell = { content: string };
+
+function parseRows(md: string): Cell[][] {
+ const parser = markdownit("default").use(tablesRule);
+ const tokens = parser.parse(md, {});
+
+ const rows: Cell[][] = [];
+ let row: Cell[] | null = null;
+
+ for (const token of tokens) {
+ if (token.type === "tr_open") {
+ row = [];
+ } else if (token.type === "tr_close" && row) {
+ rows.push(row);
+ row = null;
+ } else if ((token.type === "th_open" || token.type === "td_open") && row) {
+ // The next inline token holds the cell text.
+ row.push({ content: "" });
+ } else if (token.type === "inline" && row && row.length > 0) {
+ row[row.length - 1].content = token.content;
+ }
+ }
+
+ return rows;
+}
+
+describe("table rule", () => {
+ describe("pipes inside code spans", () => {
+ it("does not split a cell on pipes inside a backtick code span", () => {
+ const md = ["| A | B |", "| --- | --- |", "| x | `|y|` |", ""].join("\n");
+
+ const rows = parseRows(md);
+
+ expect(rows).toHaveLength(2);
+ expect(rows[0].map((c) => c.content)).toEqual(["A", "B"]);
+ expect(rows[1].map((c) => c.content)).toEqual(["x", "`|y|`"]);
+ });
+
+ it("preserves multiple code-spanned expressions with pipes in one cell", () => {
+ const md = [
+ "| Condition | Facts |",
+ "| --- | --- |",
+ "| Absolute time difference | The system checks `|Clock_NTP - Clock_GPS|` and `|Clock_NTP2 - Clock_GPS2|`. |",
+ "",
+ ].join("\n");
+
+ const rows = parseRows(md);
+
+ expect(rows).toHaveLength(2);
+ expect(rows[1]).toHaveLength(2);
+ expect(rows[1][0].content).toBe("Absolute time difference");
+ expect(rows[1][1].content).toBe(
+ "The system checks `|Clock_NTP - Clock_GPS|` and `|Clock_NTP2 - Clock_GPS2|`."
+ );
+ });
+
+ it("handles double-backtick code spans containing single backticks and pipes", () => {
+ const md = ["| A | B |", "| --- | --- |", "| x | ``a|b`c`` |", ""].join(
+ "\n"
+ );
+
+ const rows = parseRows(md);
+
+ expect(rows[1].map((c) => c.content)).toEqual(["x", "``a|b`c``"]);
+ });
+
+ it("still treats unfenced pipes as cell separators", () => {
+ const md = [
+ "| A | B | C |",
+ "| --- | --- | --- |",
+ "| x | y | z |",
+ "",
+ ].join("\n");
+
+ const rows = parseRows(md);
+
+ expect(rows[1].map((c) => c.content)).toEqual(["x", "y", "z"]);
+ });
+
+ it("treats backslash-escaped pipes inside code spans as literal", () => {
+ const md = ["| A | B |", "| --- | --- |", "| x | `\\|y\\|` |", ""].join(
+ "\n"
+ );
+
+ const rows = parseRows(md);
+
+ // Backslash escapes are not processed inside code spans, but the
+ // pipes are inside the code span so they must not split the cell.
+ expect(rows[1]).toHaveLength(2);
+ expect(rows[1][0].content).toBe("x");
+ // The cell should still contain the original code-span text (the
+ // exact serialized form is not important here, but it must not be
+ // truncated).
+ expect(rows[1][1].content).toContain("y");
+ expect(rows[1][1].content).toContain("|");
+ });
+ });
+
+ describe("pipes inside inline math", () => {
+ it("does not split a cell on pipes inside $...$ math", () => {
+ const md = ["| A | B |", "| --- | --- |", "| x | $|a-b|$ |", ""].join(
+ "\n"
+ );
+
+ const rows = parseRows(md);
+
+ expect(rows[1]).toHaveLength(2);
+ expect(rows[1][1].content).toBe("$|a-b|$");
+ });
+
+ it("does not treat lone dollar signs as math openers", () => {
+ const md = ["| A | B |", "| --- | --- |", "| $5 | $10 |", ""].join("\n");
+
+ const rows = parseRows(md);
+
+ // Lone dollar amounts shouldn't open math spans and merge cells.
+ expect(rows[1].map((c) => c.content)).toEqual(["$5", "$10"]);
+ });
+ });
+
+ describe("backslash escapes outside code spans", () => {
+ it("still honors \\| as an escaped cell-pipe", () => {
+ const md = ["| A | B |", "| --- | --- |", "| x | a\\|b |", ""].join("\n");
+
+ const rows = parseRows(md);
+
+ expect(rows[1]).toHaveLength(2);
+ expect(rows[1][1].content).toContain("|");
+ });
+ });
+});
diff --git a/shared/editor/rules/tables.ts b/shared/editor/rules/tables.ts
index ffaaf4def2..350c38e6e7 100644
--- a/shared/editor/rules/tables.ts
+++ b/shared/editor/rules/tables.ts
@@ -1,4 +1,5 @@
import type MarkdownIt from "markdown-it";
+import type StateBlock from "markdown-it/lib/rules_block/state_block.mjs";
const BREAK_REGEX = /(?<=^|[^\\])\\n/;
const BR_TAG_REGEX = /
/gi;
@@ -6,7 +7,372 @@ const BR_TAG_REGEX = /
/gi;
// Stops at
or newline to handle multiple checkboxes in a cell
const CHECKBOX_REGEX = /^(?:-\s*)?\[(X|\s|_|-)\]\s([^<\n]*)?/i;
+const TAB = 0x09;
+const SPACE = 0x20;
+const DOLLAR = 0x24;
+const BACKSLASH = 0x5c;
+const BACKTICK = 0x60;
+const PIPE = 0x7c;
+const HYPHEN = 0x2d;
+const COLON = 0x3a;
+const DIGIT_0 = 0x30;
+const DIGIT_9 = 0x39;
+const MAX_AUTOCOMPLETED_CELLS = 0x10000;
+
+function isSpace(code: number): boolean {
+ return code === SPACE || code === TAB;
+}
+
+/**
+ * Skip over a backtick-delimited code span starting at `pos`.
+ *
+ * @returns the position just past the closing run if a balanced span exists,
+ * otherwise -1 (indicating the backticks at `pos` are literal text).
+ */
+function skipCodeSpan(str: string, pos: number): number {
+ const max = str.length;
+ const runStart = pos;
+ while (pos < max && str.charCodeAt(pos) === BACKTICK) {
+ pos++;
+ }
+ const tickCount = pos - runStart;
+
+ let scan = pos;
+ while (scan < max) {
+ if (str.charCodeAt(scan) !== BACKTICK) {
+ scan++;
+ continue;
+ }
+ const closeStart = scan;
+ while (scan < max && str.charCodeAt(scan) === BACKTICK) {
+ scan++;
+ }
+ if (scan - closeStart === tickCount) {
+ return scan;
+ }
+ }
+ return -1;
+}
+
+/**
+ * Skip over an inline $...$ math span starting at `pos`.
+ *
+ * Mirrors the opener/closer constraints used by the math inline rule so that
+ * literal dollar amounts (e.g. "$5") do not accidentally consume a cell.
+ *
+ * @returns the position just past the closing `$` if a balanced span exists,
+ * otherwise -1.
+ */
+function skipMathSpan(str: string, pos: number): number {
+ const max = str.length;
+ // Opener: next char must not be whitespace.
+ const next = pos + 1 < max ? str.charCodeAt(pos + 1) : -1;
+ if (next === -1 || isSpace(next)) {
+ return -1;
+ }
+
+ let scan = pos + 1;
+ while (scan < max) {
+ const ch = str.charCodeAt(scan);
+ if (ch === BACKSLASH) {
+ // Skip escaped char.
+ scan += 2;
+ continue;
+ }
+ if (ch === DOLLAR) {
+ const prev = str.charCodeAt(scan - 1);
+ const after = scan + 1 < max ? str.charCodeAt(scan + 1) : -1;
+ // Closer: prev not whitespace, next not a digit.
+ if (!isSpace(prev) && !(after >= DIGIT_0 && after <= DIGIT_9)) {
+ return scan + 1;
+ }
+ }
+ scan++;
+ }
+ return -1;
+}
+
+/**
+ * Split a table row on unescaped pipes, while ignoring pipes that fall inside
+ * inline code spans or inline math spans. This is a superset of markdown-it's
+ * default behaviour, which only honours `\|` escapes and so silently truncates
+ * cells whose content contains literal `|` characters inside `` `...` `` or
+ * `$...$`.
+ */
+function escapedSplit(str: string): string[] {
+ const result: string[] = [];
+ const max = str.length;
+
+ let pos = 0;
+ let isEscaped = false;
+ let lastPos = 0;
+ let current = "";
+
+ while (pos < max) {
+ const ch = str.charCodeAt(pos);
+
+ if (!isEscaped && ch === BACKTICK) {
+ const end = skipCodeSpan(str, pos);
+ if (end !== -1) {
+ pos = end;
+ isEscaped = false;
+ continue;
+ }
+ } else if (!isEscaped && ch === DOLLAR) {
+ const end = skipMathSpan(str, pos);
+ if (end !== -1) {
+ pos = end;
+ isEscaped = false;
+ continue;
+ }
+ }
+
+ if (ch === PIPE) {
+ if (!isEscaped) {
+ result.push(current + str.substring(lastPos, pos));
+ current = "";
+ lastPos = pos + 1;
+ } else {
+ // Escaped pipe `\|` becomes part of the current cell, dropping the
+ // leading backslash.
+ current += str.substring(lastPos, pos - 1);
+ lastPos = pos;
+ }
+ }
+
+ isEscaped = ch === BACKSLASH;
+ pos++;
+ }
+
+ result.push(current + str.substring(lastPos));
+ return result;
+}
+
+function getLine(state: StateBlock, line: number): string {
+ const pos = state.bMarks[line] + state.tShift[line];
+ const max = state.eMarks[line];
+ return state.src.slice(pos, max);
+}
+
+/**
+ * GFM table block rule, forked from markdown-it. The only behavioural change
+ * from the upstream rule is that {@link escapedSplit} also recognises
+ * backtick-delimited code spans and `$...$` math spans, so pipes inside such
+ * spans no longer split the row.
+ */
+function tableRule(
+ state: StateBlock,
+ startLine: number,
+ endLine: number,
+ silent: boolean
+): boolean {
+ if (startLine + 2 > endLine) {
+ return false;
+ }
+
+ let nextLine = startLine + 1;
+ if (state.sCount[nextLine] < state.blkIndent) {
+ return false;
+ }
+ if (state.sCount[nextLine] - state.blkIndent >= 4) {
+ return false;
+ }
+
+ let pos = state.bMarks[nextLine] + state.tShift[nextLine];
+ if (pos >= state.eMarks[nextLine]) {
+ return false;
+ }
+
+ const firstCh = state.src.charCodeAt(pos++);
+ if (firstCh !== PIPE && firstCh !== HYPHEN && firstCh !== COLON) {
+ return false;
+ }
+ if (pos >= state.eMarks[nextLine]) {
+ return false;
+ }
+
+ const secondCh = state.src.charCodeAt(pos++);
+ if (
+ secondCh !== PIPE &&
+ secondCh !== HYPHEN &&
+ secondCh !== COLON &&
+ !isSpace(secondCh)
+ ) {
+ return false;
+ }
+ if (firstCh === HYPHEN && isSpace(secondCh)) {
+ return false;
+ }
+
+ while (pos < state.eMarks[nextLine]) {
+ const ch = state.src.charCodeAt(pos);
+ if (ch !== PIPE && ch !== HYPHEN && ch !== COLON && !isSpace(ch)) {
+ return false;
+ }
+ pos++;
+ }
+
+ let lineText = getLine(state, startLine + 1);
+ let columns = lineText.split("|");
+ const aligns: string[] = [];
+ for (let i = 0; i < columns.length; i++) {
+ const t = columns[i].trim();
+ if (!t) {
+ if (i === 0 || i === columns.length - 1) {
+ continue;
+ } else {
+ return false;
+ }
+ }
+ if (!/^:?-+:?$/.test(t)) {
+ return false;
+ }
+ if (t.charCodeAt(t.length - 1) === COLON) {
+ aligns.push(t.charCodeAt(0) === COLON ? "center" : "right");
+ } else if (t.charCodeAt(0) === COLON) {
+ aligns.push("left");
+ } else {
+ aligns.push("");
+ }
+ }
+
+ lineText = getLine(state, startLine).trim();
+ if (lineText.indexOf("|") === -1) {
+ return false;
+ }
+ if (state.sCount[startLine] - state.blkIndent >= 4) {
+ return false;
+ }
+ columns = escapedSplit(lineText);
+ if (columns.length && columns[0] === "") {
+ columns.shift();
+ }
+ if (columns.length && columns[columns.length - 1] === "") {
+ columns.pop();
+ }
+
+ const columnCount = columns.length;
+ if (columnCount === 0 || columnCount !== aligns.length) {
+ return false;
+ }
+
+ if (silent) {
+ return true;
+ }
+
+ const oldParentType = state.parentType;
+ // The StateBlock type only knows the standard parent-type strings. Casting
+ // keeps us in lockstep with the upstream rule.
+ state.parentType = "table" as typeof state.parentType;
+
+ const terminatorRules = state.md.block.ruler.getRules("blockquote");
+
+ const tableTokenOpen = state.push("table_open", "table", 1);
+ const tableLines: [number, number] = [startLine, 0];
+ tableTokenOpen.map = tableLines;
+
+ const tHeadOpen = state.push("thead_open", "thead", 1);
+ tHeadOpen.map = [startLine, startLine + 1];
+
+ const headerRowOpen = state.push("tr_open", "tr", 1);
+ headerRowOpen.map = [startLine, startLine + 1];
+
+ for (let i = 0; i < columns.length; i++) {
+ const thOpen = state.push("th_open", "th", 1);
+ if (aligns[i]) {
+ thOpen.attrs = [["style", "text-align:" + aligns[i]]];
+ }
+ const inlineToken = state.push("inline", "", 0);
+ inlineToken.content = columns[i].trim();
+ inlineToken.children = [];
+ state.push("th_close", "th", -1);
+ }
+
+ state.push("tr_close", "tr", -1);
+ state.push("thead_close", "thead", -1);
+
+ let tBodyLines: [number, number] | undefined;
+ let autocompletedCells = 0;
+
+ for (nextLine = startLine + 2; nextLine < endLine; nextLine++) {
+ if (state.sCount[nextLine] < state.blkIndent) {
+ break;
+ }
+
+ let terminate = false;
+ for (let i = 0, l = terminatorRules.length; i < l; i++) {
+ if (terminatorRules[i](state, nextLine, endLine, true)) {
+ terminate = true;
+ break;
+ }
+ }
+ if (terminate) {
+ break;
+ }
+
+ lineText = getLine(state, nextLine).trim();
+ if (!lineText) {
+ break;
+ }
+ if (state.sCount[nextLine] - state.blkIndent >= 4) {
+ break;
+ }
+
+ columns = escapedSplit(lineText);
+ if (columns.length && columns[0] === "") {
+ columns.shift();
+ }
+ if (columns.length && columns[columns.length - 1] === "") {
+ columns.pop();
+ }
+
+ autocompletedCells += columnCount - columns.length;
+ if (autocompletedCells > MAX_AUTOCOMPLETED_CELLS) {
+ break;
+ }
+
+ if (nextLine === startLine + 2) {
+ const tBodyOpen = state.push("tbody_open", "tbody", 1);
+ tBodyLines = [startLine + 2, 0];
+ tBodyOpen.map = tBodyLines;
+ }
+
+ const rowOpen = state.push("tr_open", "tr", 1);
+ rowOpen.map = [nextLine, nextLine + 1];
+
+ for (let i = 0; i < columnCount; i++) {
+ const tdOpen = state.push("td_open", "td", 1);
+ if (aligns[i]) {
+ tdOpen.attrs = [["style", "text-align:" + aligns[i]]];
+ }
+ const inlineToken = state.push("inline", "", 0);
+ inlineToken.content = columns[i] ? columns[i].trim() : "";
+ inlineToken.children = [];
+ state.push("td_close", "td", -1);
+ }
+ state.push("tr_close", "tr", -1);
+ }
+
+ if (tBodyLines) {
+ state.push("tbody_close", "tbody", -1);
+ tBodyLines[1] = nextLine;
+ }
+
+ state.push("table_close", "table", -1);
+ tableLines[1] = nextLine;
+
+ state.parentType = oldParentType;
+ state.line = nextLine;
+ return true;
+}
+
export default function markdownTables(md: MarkdownIt): void {
+ // Replace the built-in GFM table rule with one that ignores pipes inside
+ // inline code and math spans when splitting cells. Without this, content
+ // such as `` `|a-b|` `` would silently split a row into extra cells and
+ // truncate the document on round-trip.
+ md.block.ruler.at("table", tableRule, { alt: ["paragraph", "reference"] });
+
// insert a new rule after the "inline" rules are parsed
md.core.ruler.after("inline", "tables-pm", (state) => {
const tokens = state.tokens;