fix: Pipes in math and code blocks within table cells

This commit is contained in:
Tom Moor
2026-05-28 21:53:06 -04:00
parent 3f92e96006
commit 8ded4eb995
3 changed files with 325 additions and 2 deletions
+117 -1
View File
@@ -6,7 +6,7 @@ import * as Y from "yjs";
import type { ProsemirrorData } from "@shared/types";
import { MentionType } from "@shared/types";
import { createContext } from "@server/context";
import { schema } from "@server/editor";
import { parser, schema, serializer } from "@server/editor";
import { buildProseMirrorDoc, buildUser } from "@server/test/factories";
import type { MentionAttrs } from "./ProsemirrorHelper";
import { ProsemirrorHelper } from "./ProsemirrorHelper";
@@ -1568,3 +1568,119 @@ describe("ProsemirrorHelper", () => {
});
});
});
describe("table markdown round trip", () => {
const roundTrip = (md: string) => {
const doc = parser.parse(md);
expect(doc).not.toBeNull();
const first = serializer.serialize(doc!);
const second = serializer.serialize(parser.parse(first)!);
return { first, second };
};
const getCellTexts = (md: string) => {
const doc = parser.parse(md)!;
const table = doc.content.firstChild!;
expect(table.type.name).toBe("table");
const rows: string[][] = [];
table.forEach((row) => {
const cells: string[] = [];
row.forEach((cell) => cells.push(cell.textContent));
rows.push(cells);
});
return rows;
};
it("preserves a single inline code span containing pipes", () => {
const cells = getCellTexts(
["| A | B |", "| --- | --- |", "| x | `|y|` |", ""].join("\n")
);
expect(cells).toEqual([
["A", "B"],
["x", "|y|"],
]);
});
it("preserves multiple inline code spans with pipes in the same cell", () => {
const md = [
"| Condition | Facts |",
"| --- | --- |",
"| Absolute time difference | The system checks `|Clock_NTP_Camera1 - Clock_GPS_Camera1|` and `|Clock_NTP_Camera2 - Clock_GPS_Camera2|`. |",
"",
].join("\n");
const cells = getCellTexts(md);
expect(cells).toHaveLength(2);
expect(cells[1][0]).toBe("Absolute time difference");
expect(cells[1][1]).toBe(
"The system checks |Clock_NTP_Camera1 - Clock_GPS_Camera1| and |Clock_NTP_Camera2 - Clock_GPS_Camera2|."
);
});
it("preserves inline math containing pipes", () => {
const cells = getCellTexts(
["| A | B |", "| --- | --- |", "| x | $|a-b|$ |", ""].join("\n")
);
expect(cells[1][0]).toBe("x");
expect(cells[1][1]).toBe("|a-b|");
});
it("preserves identifiers with underscores and braces inside code spans", () => {
const cells = getCellTexts(
[
"| Field | Value |",
"| --- | --- |",
"| ID | `foo_{bar}|baz_{qux}` |",
"",
].join("\n")
);
expect(cells[1][1]).toBe("foo_{bar}|baz_{qux}");
});
it("re-serializes a table with code-span pipes idempotently", () => {
const { first, second } = roundTrip(
["| A | B |", "| --- | --- |", "| x | `|y|` |", ""].join("\n")
);
expect(second).toBe(first);
});
it("re-serializes a table with prose plus code-span pipes idempotently", () => {
const { first, second } = roundTrip(
[
"| Condition | Facts |",
"| --- | --- |",
"| Absolute time difference | The system checks `|Clock_NTP - Clock_GPS|`. |",
"",
].join("\n")
);
expect(second).toBe(first);
});
it("re-serializes a table with inline math pipes idempotently", () => {
const { first, second } = roundTrip(
["| A | B |", "| --- | --- |", "| x | $|a-b|$ |", ""].join("\n")
);
expect(second).toBe(first);
});
it("still splits cells on unescaped pipes outside code spans", () => {
const cells = getCellTexts(
["| A | B | C |", "| --- | --- | --- |", "| x | y | z |", ""].join("\n")
);
expect(cells[1]).toEqual(["x", "y", "z"]);
});
it("does not escape pipes in code spans outside of tables", () => {
const md = "Inline `a|b` code outside a table.";
const doc = parser.parse(md)!;
expect(doc.textContent).toBe("Inline a|b code outside a table.");
expect(serializer.serialize(doc).trim()).toBe(md);
});
});
+6 -1
View File
@@ -84,7 +84,12 @@ export default class Math extends Node {
toMarkdown(state: MarkdownSerializerState, node: ProsemirrorNode) {
state.write("$");
state.text(node.textContent, false);
// Pipes inside math would otherwise be mistaken for cell delimiters when
// the math appears within a table, so escape them here.
const content = state.inTable
? node.textContent.replace(/\|/g, "\\$&")
: node.textContent;
state.text(content, false);
state.write("$");
}
+202
View File
@@ -6,7 +6,209 @@ const BR_TAG_REGEX = /<br\s*\/?>/gi;
// Stops at <br> or newline to handle multiple checkboxes in a cell
const CHECKBOX_REGEX = /^(?:-\s*)?\[(X|\s|_|-)\]\s([^<\n]*)?/i;
// A GFM table delimiter row consists only of pipes, dashes, colons and spaces,
// contains at least one dash and one pipe, e.g. "| --- | :--: |".
const DELIMITER_ROW_REGEX = /^[\s|:-]*-[\s|:-]*$/;
/**
* Escape unescaped pipe characters within a single line so that they survive
* markdown-it's GFM table cell splitting. Pipes already preceded by a backslash
* are left untouched to avoid double escaping.
*
* @param str the string to escape pipes within.
* @returns the string with unescaped pipes backslash escaped.
*/
function escapePipes(str: string): string {
return str.replace(/(?<!\\)\|/g, "\\|");
}
/**
* Find the index of a closing inline code fence — a run of backticks of exactly
* `length` — starting the search at `from`.
*
* @param line the line to search within.
* @param from the index to begin searching from.
* @param length the exact backtick run length to match.
* @returns the index of the closing fence, or -1 if none is found.
*/
function findClosingFence(line: string, from: number, length: number): number {
let i = from;
while (i < line.length) {
if (line[i] === "`") {
let j = i;
while (j < line.length && line[j] === "`") {
j++;
}
if (j - i === length) {
return i;
}
i = j;
} else {
i++;
}
}
return -1;
}
/**
* Find the index of a valid closing inline math delimiter ("$") for an opening
* delimiter at `openIdx`, mirroring the open/close validity checks of the inline
* math rule so that currency-like text ("$5 | $10") is not treated as math.
*
* @param line the line to search within.
* @param openIdx the index of the opening "$" delimiter.
* @returns the index of the closing delimiter, or -1 if there is none.
*/
function findClosingMath(line: string, openIdx: number): number {
const next = line[openIdx + 1];
// Opening delimiter cannot be followed by whitespace, and must have content.
if (next === undefined || next === " " || next === "\t") {
return -1;
}
for (let i = openIdx + 2; i < line.length; i++) {
if (line[i] !== "$") {
continue;
}
// Count preceding backslashes — an odd count means the "$" is escaped.
let backslashes = 0;
for (let k = i - 1; k >= 0 && line[k] === "\\"; k--) {
backslashes++;
}
if (backslashes % 2 !== 0) {
continue;
}
// Closing delimiter cannot be preceded by whitespace or followed by a digit.
const prev = line[i - 1];
const after = line[i + 1];
const prevIsSpace = prev === " " || prev === "\t";
const afterIsDigit = after !== undefined && after >= "0" && after <= "9";
if (!prevIsSpace && !afterIsDigit) {
return i;
}
}
return -1;
}
/**
* Escape pipe characters that appear inside inline code spans (`` `...` ``) and
* inline math (`$...$`) within a single table row. The surrounding cell
* delimiters are left untouched so the row still splits into the correct cells.
*
* @param line the table row to process.
* @returns the row with span-internal pipes escaped.
*/
function escapeSpanPipesInRow(line: string): string {
let result = "";
let i = 0;
while (i < line.length) {
const ch = line[i];
// Preserve existing backslash escapes verbatim.
if (ch === "\\") {
result += line.slice(i, i + 2);
i += 2;
continue;
}
// Inline code span, delimited by an equal length run of backticks.
if (ch === "`") {
let runEnd = i;
while (runEnd < line.length && line[runEnd] === "`") {
runEnd++;
}
const fence = line.slice(i, runEnd);
const closeIdx = findClosingFence(line, runEnd, fence.length);
if (closeIdx === -1) {
result += fence;
i = runEnd;
continue;
}
result += fence + escapePipes(line.slice(runEnd, closeIdx)) + fence;
i = closeIdx + fence.length;
continue;
}
// Inline math, delimited by single dollar signs.
if (ch === "$") {
const closeIdx = findClosingMath(line, i);
if (closeIdx === -1) {
result += ch;
i++;
continue;
}
result += "$" + escapePipes(line.slice(i + 1, closeIdx)) + "$";
i = closeIdx + 1;
continue;
}
result += ch;
i++;
}
return result;
}
/**
* Escape pipe characters inside inline code spans and math within GFM tables.
*
* markdown-it's table block rule splits cells on the pipe character and is
* unaware of inline code spans and math, so an unescaped pipe inside `` `...` ``
* or `$...$` incorrectly fragments — or even truncates — the cell. Escaping
* those pipes before block tokenization lets the table split into the right
* cells; the backslash is stripped again when each cell's content is re-parsed
* inline, so the code/math content is preserved exactly.
*
* @param src the full markdown source.
* @returns the source with span-internal pipes within tables escaped.
*/
export function escapeTableSpanPipes(src: string): string {
if (src.indexOf("|") === -1) {
return src;
}
const lines = src.split("\n");
for (let i = 0; i < lines.length - 1; i++) {
const header = lines[i];
const delimiter = lines[i + 1];
// A table begins with a header row containing a pipe immediately followed
// by a delimiter row that itself contains a pipe.
if (
header.indexOf("|") === -1 ||
delimiter.indexOf("|") === -1 ||
!DELIMITER_ROW_REGEX.test(delimiter.trim())
) {
continue;
}
// Escape the header and every body row until a blank line terminates the
// table. The delimiter row itself never contains spans to escape.
lines[i] = escapeSpanPipesInRow(header);
let j = i + 2;
while (j < lines.length && lines[j].trim() !== "") {
lines[j] = escapeSpanPipesInRow(lines[j]);
j++;
}
i = j - 1;
}
return lines.join("\n");
}
export default function markdownTables(md: MarkdownIt): void {
// Escape pipes inside code/math spans before the block table rule splits
// cells, so that those pipes are not mistaken for cell delimiters.
md.core.ruler.before("block", "tables-pm-escape", (state) => {
state.src = escapeTableSpanPipes(state.src);
return false;
});
// insert a new rule after the "inline" rules are parsed
md.core.ruler.after("inline", "tables-pm", (state) => {
const tokens = state.tokens;