From 8ded4eb995ceaf048afb77b9e7b53c1b8c54d024 Mon Sep 17 00:00:00 2001 From: Tom Moor Date: Thu, 28 May 2026 21:53:06 -0400 Subject: [PATCH] fix: Pipes in math and code blocks within table cells --- .../models/helpers/ProseMirrorHelper.test.ts | 118 +++++++++- shared/editor/nodes/Math.ts | 7 +- shared/editor/rules/tables.ts | 202 ++++++++++++++++++ 3 files changed, 325 insertions(+), 2 deletions(-) diff --git a/server/models/helpers/ProseMirrorHelper.test.ts b/server/models/helpers/ProseMirrorHelper.test.ts index 445f56c6e2..bc17d5f2be 100644 --- a/server/models/helpers/ProseMirrorHelper.test.ts +++ b/server/models/helpers/ProseMirrorHelper.test.ts @@ -6,7 +6,7 @@ import * as Y from "yjs"; import type { ProsemirrorData } from "@shared/types"; import { MentionType } from "@shared/types"; import { createContext } from "@server/context"; -import { schema } from "@server/editor"; +import { parser, schema, serializer } from "@server/editor"; import { buildProseMirrorDoc, buildUser } from "@server/test/factories"; import type { MentionAttrs } from "./ProsemirrorHelper"; import { ProsemirrorHelper } from "./ProsemirrorHelper"; @@ -1568,3 +1568,119 @@ describe("ProsemirrorHelper", () => { }); }); }); + +describe("table markdown round trip", () => { + const roundTrip = (md: string) => { + const doc = parser.parse(md); + expect(doc).not.toBeNull(); + const first = serializer.serialize(doc!); + const second = serializer.serialize(parser.parse(first)!); + return { first, second }; + }; + + const getCellTexts = (md: string) => { + const doc = parser.parse(md)!; + const table = doc.content.firstChild!; + expect(table.type.name).toBe("table"); + const rows: string[][] = []; + table.forEach((row) => { + const cells: string[] = []; + row.forEach((cell) => cells.push(cell.textContent)); + rows.push(cells); + }); + return rows; + }; + + it("preserves a single inline code span containing pipes", () => { + const cells = getCellTexts( + ["| A | B |", "| --- | --- |", "| x | `|y|` |", ""].join("\n") + ); + + expect(cells).toEqual([ + ["A", "B"], + ["x", "|y|"], + ]); + }); + + it("preserves multiple inline code spans with pipes in the same cell", () => { + const md = [ + "| Condition | Facts |", + "| --- | --- |", + "| Absolute time difference | The system checks `|Clock_NTP_Camera1 - Clock_GPS_Camera1|` and `|Clock_NTP_Camera2 - Clock_GPS_Camera2|`. |", + "", + ].join("\n"); + + const cells = getCellTexts(md); + expect(cells).toHaveLength(2); + expect(cells[1][0]).toBe("Absolute time difference"); + expect(cells[1][1]).toBe( + "The system checks |Clock_NTP_Camera1 - Clock_GPS_Camera1| and |Clock_NTP_Camera2 - Clock_GPS_Camera2|." + ); + }); + + it("preserves inline math containing pipes", () => { + const cells = getCellTexts( + ["| A | B |", "| --- | --- |", "| x | $|a-b|$ |", ""].join("\n") + ); + + expect(cells[1][0]).toBe("x"); + expect(cells[1][1]).toBe("|a-b|"); + }); + + it("preserves identifiers with underscores and braces inside code spans", () => { + const cells = getCellTexts( + [ + "| Field | Value |", + "| --- | --- |", + "| ID | `foo_{bar}|baz_{qux}` |", + "", + ].join("\n") + ); + + expect(cells[1][1]).toBe("foo_{bar}|baz_{qux}"); + }); + + it("re-serializes a table with code-span pipes idempotently", () => { + const { first, second } = roundTrip( + ["| A | B |", "| --- | --- |", "| x | `|y|` |", ""].join("\n") + ); + + expect(second).toBe(first); + }); + + it("re-serializes a table with prose plus code-span pipes idempotently", () => { + const { first, second } = roundTrip( + [ + "| Condition | Facts |", + "| --- | --- |", + "| Absolute time difference | The system checks `|Clock_NTP - Clock_GPS|`. |", + "", + ].join("\n") + ); + + expect(second).toBe(first); + }); + + it("re-serializes a table with inline math pipes idempotently", () => { + const { first, second } = roundTrip( + ["| A | B |", "| --- | --- |", "| x | $|a-b|$ |", ""].join("\n") + ); + + expect(second).toBe(first); + }); + + it("still splits cells on unescaped pipes outside code spans", () => { + const cells = getCellTexts( + ["| A | B | C |", "| --- | --- | --- |", "| x | y | z |", ""].join("\n") + ); + + expect(cells[1]).toEqual(["x", "y", "z"]); + }); + + it("does not escape pipes in code spans outside of tables", () => { + const md = "Inline `a|b` code outside a table."; + const doc = parser.parse(md)!; + expect(doc.textContent).toBe("Inline a|b code outside a table."); + expect(serializer.serialize(doc).trim()).toBe(md); + }); +}); diff --git a/shared/editor/nodes/Math.ts b/shared/editor/nodes/Math.ts index ea54419228..6d08d722fd 100644 --- a/shared/editor/nodes/Math.ts +++ b/shared/editor/nodes/Math.ts @@ -84,7 +84,12 @@ export default class Math extends Node { toMarkdown(state: MarkdownSerializerState, node: ProsemirrorNode) { state.write("$"); - state.text(node.textContent, false); + // Pipes inside math would otherwise be mistaken for cell delimiters when + // the math appears within a table, so escape them here. + const content = state.inTable + ? node.textContent.replace(/\|/g, "\\$&") + : node.textContent; + state.text(content, false); state.write("$"); } diff --git a/shared/editor/rules/tables.ts b/shared/editor/rules/tables.ts index ffaaf4def2..3135471cd4 100644 --- a/shared/editor/rules/tables.ts +++ b/shared/editor/rules/tables.ts @@ -6,7 +6,209 @@ const BR_TAG_REGEX = //gi; // Stops at
or newline to handle multiple checkboxes in a cell const CHECKBOX_REGEX = /^(?:-\s*)?\[(X|\s|_|-)\]\s([^<\n]*)?/i; +// A GFM table delimiter row consists only of pipes, dashes, colons and spaces, +// contains at least one dash and one pipe, e.g. "| --- | :--: |". +const DELIMITER_ROW_REGEX = /^[\s|:-]*-[\s|:-]*$/; + +/** + * Escape unescaped pipe characters within a single line so that they survive + * markdown-it's GFM table cell splitting. Pipes already preceded by a backslash + * are left untouched to avoid double escaping. + * + * @param str the string to escape pipes within. + * @returns the string with unescaped pipes backslash escaped. + */ +function escapePipes(str: string): string { + return str.replace(/(?= 0 && line[k] === "\\"; k--) { + backslashes++; + } + if (backslashes % 2 !== 0) { + continue; + } + + // Closing delimiter cannot be preceded by whitespace or followed by a digit. + const prev = line[i - 1]; + const after = line[i + 1]; + const prevIsSpace = prev === " " || prev === "\t"; + const afterIsDigit = after !== undefined && after >= "0" && after <= "9"; + if (!prevIsSpace && !afterIsDigit) { + return i; + } + } + + return -1; +} + +/** + * Escape pipe characters that appear inside inline code spans (`` `...` ``) and + * inline math (`$...$`) within a single table row. The surrounding cell + * delimiters are left untouched so the row still splits into the correct cells. + * + * @param line the table row to process. + * @returns the row with span-internal pipes escaped. + */ +function escapeSpanPipesInRow(line: string): string { + let result = ""; + let i = 0; + + while (i < line.length) { + const ch = line[i]; + + // Preserve existing backslash escapes verbatim. + if (ch === "\\") { + result += line.slice(i, i + 2); + i += 2; + continue; + } + + // Inline code span, delimited by an equal length run of backticks. + if (ch === "`") { + let runEnd = i; + while (runEnd < line.length && line[runEnd] === "`") { + runEnd++; + } + const fence = line.slice(i, runEnd); + const closeIdx = findClosingFence(line, runEnd, fence.length); + if (closeIdx === -1) { + result += fence; + i = runEnd; + continue; + } + result += fence + escapePipes(line.slice(runEnd, closeIdx)) + fence; + i = closeIdx + fence.length; + continue; + } + + // Inline math, delimited by single dollar signs. + if (ch === "$") { + const closeIdx = findClosingMath(line, i); + if (closeIdx === -1) { + result += ch; + i++; + continue; + } + result += "$" + escapePipes(line.slice(i + 1, closeIdx)) + "$"; + i = closeIdx + 1; + continue; + } + + result += ch; + i++; + } + + return result; +} + +/** + * Escape pipe characters inside inline code spans and math within GFM tables. + * + * markdown-it's table block rule splits cells on the pipe character and is + * unaware of inline code spans and math, so an unescaped pipe inside `` `...` `` + * or `$...$` incorrectly fragments — or even truncates — the cell. Escaping + * those pipes before block tokenization lets the table split into the right + * cells; the backslash is stripped again when each cell's content is re-parsed + * inline, so the code/math content is preserved exactly. + * + * @param src the full markdown source. + * @returns the source with span-internal pipes within tables escaped. + */ +export function escapeTableSpanPipes(src: string): string { + if (src.indexOf("|") === -1) { + return src; + } + + const lines = src.split("\n"); + + for (let i = 0; i < lines.length - 1; i++) { + const header = lines[i]; + const delimiter = lines[i + 1]; + + // A table begins with a header row containing a pipe immediately followed + // by a delimiter row that itself contains a pipe. + if ( + header.indexOf("|") === -1 || + delimiter.indexOf("|") === -1 || + !DELIMITER_ROW_REGEX.test(delimiter.trim()) + ) { + continue; + } + + // Escape the header and every body row until a blank line terminates the + // table. The delimiter row itself never contains spans to escape. + lines[i] = escapeSpanPipesInRow(header); + let j = i + 2; + while (j < lines.length && lines[j].trim() !== "") { + lines[j] = escapeSpanPipesInRow(lines[j]); + j++; + } + i = j - 1; + } + + return lines.join("\n"); +} + export default function markdownTables(md: MarkdownIt): void { + // Escape pipes inside code/math spans before the block table rule splits + // cells, so that those pipes are not mistaken for cell delimiters. + md.core.ruler.before("block", "tables-pm-escape", (state) => { + state.src = escapeTableSpanPipes(state.src); + return false; + }); + // insert a new rule after the "inline" rules are parsed md.core.ruler.after("inline", "tables-pm", (state) => { const tokens = state.tokens;