mirror of
https://github.com/outline/outline.git
synced 2026-06-13 03:14:59 +03:00
fix: Pipes in math and code blocks within table cells
This commit is contained in:
@@ -6,7 +6,7 @@ import * as Y from "yjs";
|
||||
import type { ProsemirrorData } from "@shared/types";
|
||||
import { MentionType } from "@shared/types";
|
||||
import { createContext } from "@server/context";
|
||||
import { schema } from "@server/editor";
|
||||
import { parser, schema, serializer } from "@server/editor";
|
||||
import { buildProseMirrorDoc, buildUser } from "@server/test/factories";
|
||||
import type { MentionAttrs } from "./ProsemirrorHelper";
|
||||
import { ProsemirrorHelper } from "./ProsemirrorHelper";
|
||||
@@ -1568,3 +1568,119 @@ describe("ProsemirrorHelper", () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("table markdown round trip", () => {
|
||||
const roundTrip = (md: string) => {
|
||||
const doc = parser.parse(md);
|
||||
expect(doc).not.toBeNull();
|
||||
const first = serializer.serialize(doc!);
|
||||
const second = serializer.serialize(parser.parse(first)!);
|
||||
return { first, second };
|
||||
};
|
||||
|
||||
const getCellTexts = (md: string) => {
|
||||
const doc = parser.parse(md)!;
|
||||
const table = doc.content.firstChild!;
|
||||
expect(table.type.name).toBe("table");
|
||||
const rows: string[][] = [];
|
||||
table.forEach((row) => {
|
||||
const cells: string[] = [];
|
||||
row.forEach((cell) => cells.push(cell.textContent));
|
||||
rows.push(cells);
|
||||
});
|
||||
return rows;
|
||||
};
|
||||
|
||||
it("preserves a single inline code span containing pipes", () => {
|
||||
const cells = getCellTexts(
|
||||
["| A | B |", "| --- | --- |", "| x | `|y|` |", ""].join("\n")
|
||||
);
|
||||
|
||||
expect(cells).toEqual([
|
||||
["A", "B"],
|
||||
["x", "|y|"],
|
||||
]);
|
||||
});
|
||||
|
||||
it("preserves multiple inline code spans with pipes in the same cell", () => {
|
||||
const md = [
|
||||
"| Condition | Facts |",
|
||||
"| --- | --- |",
|
||||
"| Absolute time difference | The system checks `|Clock_NTP_Camera1 - Clock_GPS_Camera1|` and `|Clock_NTP_Camera2 - Clock_GPS_Camera2|`. |",
|
||||
"",
|
||||
].join("\n");
|
||||
|
||||
const cells = getCellTexts(md);
|
||||
expect(cells).toHaveLength(2);
|
||||
expect(cells[1][0]).toBe("Absolute time difference");
|
||||
expect(cells[1][1]).toBe(
|
||||
"The system checks |Clock_NTP_Camera1 - Clock_GPS_Camera1| and |Clock_NTP_Camera2 - Clock_GPS_Camera2|."
|
||||
);
|
||||
});
|
||||
|
||||
it("preserves inline math containing pipes", () => {
|
||||
const cells = getCellTexts(
|
||||
["| A | B |", "| --- | --- |", "| x | $|a-b|$ |", ""].join("\n")
|
||||
);
|
||||
|
||||
expect(cells[1][0]).toBe("x");
|
||||
expect(cells[1][1]).toBe("|a-b|");
|
||||
});
|
||||
|
||||
it("preserves identifiers with underscores and braces inside code spans", () => {
|
||||
const cells = getCellTexts(
|
||||
[
|
||||
"| Field | Value |",
|
||||
"| --- | --- |",
|
||||
"| ID | `foo_{bar}|baz_{qux}` |",
|
||||
"",
|
||||
].join("\n")
|
||||
);
|
||||
|
||||
expect(cells[1][1]).toBe("foo_{bar}|baz_{qux}");
|
||||
});
|
||||
|
||||
it("re-serializes a table with code-span pipes idempotently", () => {
|
||||
const { first, second } = roundTrip(
|
||||
["| A | B |", "| --- | --- |", "| x | `|y|` |", ""].join("\n")
|
||||
);
|
||||
|
||||
expect(second).toBe(first);
|
||||
});
|
||||
|
||||
it("re-serializes a table with prose plus code-span pipes idempotently", () => {
|
||||
const { first, second } = roundTrip(
|
||||
[
|
||||
"| Condition | Facts |",
|
||||
"| --- | --- |",
|
||||
"| Absolute time difference | The system checks `|Clock_NTP - Clock_GPS|`. |",
|
||||
"",
|
||||
].join("\n")
|
||||
);
|
||||
|
||||
expect(second).toBe(first);
|
||||
});
|
||||
|
||||
it("re-serializes a table with inline math pipes idempotently", () => {
|
||||
const { first, second } = roundTrip(
|
||||
["| A | B |", "| --- | --- |", "| x | $|a-b|$ |", ""].join("\n")
|
||||
);
|
||||
|
||||
expect(second).toBe(first);
|
||||
});
|
||||
|
||||
it("still splits cells on unescaped pipes outside code spans", () => {
|
||||
const cells = getCellTexts(
|
||||
["| A | B | C |", "| --- | --- | --- |", "| x | y | z |", ""].join("\n")
|
||||
);
|
||||
|
||||
expect(cells[1]).toEqual(["x", "y", "z"]);
|
||||
});
|
||||
|
||||
it("does not escape pipes in code spans outside of tables", () => {
|
||||
const md = "Inline `a|b` code outside a table.";
|
||||
const doc = parser.parse(md)!;
|
||||
expect(doc.textContent).toBe("Inline a|b code outside a table.");
|
||||
expect(serializer.serialize(doc).trim()).toBe(md);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -84,7 +84,12 @@ export default class Math extends Node {
|
||||
|
||||
toMarkdown(state: MarkdownSerializerState, node: ProsemirrorNode) {
|
||||
state.write("$");
|
||||
state.text(node.textContent, false);
|
||||
// Pipes inside math would otherwise be mistaken for cell delimiters when
|
||||
// the math appears within a table, so escape them here.
|
||||
const content = state.inTable
|
||||
? node.textContent.replace(/\|/g, "\\$&")
|
||||
: node.textContent;
|
||||
state.text(content, false);
|
||||
state.write("$");
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,209 @@ const BR_TAG_REGEX = /<br\s*\/?>/gi;
|
||||
// Stops at <br> or newline to handle multiple checkboxes in a cell
|
||||
const CHECKBOX_REGEX = /^(?:-\s*)?\[(X|\s|_|-)\]\s([^<\n]*)?/i;
|
||||
|
||||
// A GFM table delimiter row consists only of pipes, dashes, colons and spaces,
|
||||
// contains at least one dash and one pipe, e.g. "| --- | :--: |".
|
||||
const DELIMITER_ROW_REGEX = /^[\s|:-]*-[\s|:-]*$/;
|
||||
|
||||
/**
|
||||
* Escape unescaped pipe characters within a single line so that they survive
|
||||
* markdown-it's GFM table cell splitting. Pipes already preceded by a backslash
|
||||
* are left untouched to avoid double escaping.
|
||||
*
|
||||
* @param str the string to escape pipes within.
|
||||
* @returns the string with unescaped pipes backslash escaped.
|
||||
*/
|
||||
function escapePipes(str: string): string {
|
||||
return str.replace(/(?<!\\)\|/g, "\\|");
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the index of a closing inline code fence — a run of backticks of exactly
|
||||
* `length` — starting the search at `from`.
|
||||
*
|
||||
* @param line the line to search within.
|
||||
* @param from the index to begin searching from.
|
||||
* @param length the exact backtick run length to match.
|
||||
* @returns the index of the closing fence, or -1 if none is found.
|
||||
*/
|
||||
function findClosingFence(line: string, from: number, length: number): number {
|
||||
let i = from;
|
||||
while (i < line.length) {
|
||||
if (line[i] === "`") {
|
||||
let j = i;
|
||||
while (j < line.length && line[j] === "`") {
|
||||
j++;
|
||||
}
|
||||
if (j - i === length) {
|
||||
return i;
|
||||
}
|
||||
i = j;
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the index of a valid closing inline math delimiter ("$") for an opening
|
||||
* delimiter at `openIdx`, mirroring the open/close validity checks of the inline
|
||||
* math rule so that currency-like text ("$5 | $10") is not treated as math.
|
||||
*
|
||||
* @param line the line to search within.
|
||||
* @param openIdx the index of the opening "$" delimiter.
|
||||
* @returns the index of the closing delimiter, or -1 if there is none.
|
||||
*/
|
||||
function findClosingMath(line: string, openIdx: number): number {
|
||||
const next = line[openIdx + 1];
|
||||
// Opening delimiter cannot be followed by whitespace, and must have content.
|
||||
if (next === undefined || next === " " || next === "\t") {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (let i = openIdx + 2; i < line.length; i++) {
|
||||
if (line[i] !== "$") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Count preceding backslashes — an odd count means the "$" is escaped.
|
||||
let backslashes = 0;
|
||||
for (let k = i - 1; k >= 0 && line[k] === "\\"; k--) {
|
||||
backslashes++;
|
||||
}
|
||||
if (backslashes % 2 !== 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Closing delimiter cannot be preceded by whitespace or followed by a digit.
|
||||
const prev = line[i - 1];
|
||||
const after = line[i + 1];
|
||||
const prevIsSpace = prev === " " || prev === "\t";
|
||||
const afterIsDigit = after !== undefined && after >= "0" && after <= "9";
|
||||
if (!prevIsSpace && !afterIsDigit) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Escape pipe characters that appear inside inline code spans (`` `...` ``) and
|
||||
* inline math (`$...$`) within a single table row. The surrounding cell
|
||||
* delimiters are left untouched so the row still splits into the correct cells.
|
||||
*
|
||||
* @param line the table row to process.
|
||||
* @returns the row with span-internal pipes escaped.
|
||||
*/
|
||||
function escapeSpanPipesInRow(line: string): string {
|
||||
let result = "";
|
||||
let i = 0;
|
||||
|
||||
while (i < line.length) {
|
||||
const ch = line[i];
|
||||
|
||||
// Preserve existing backslash escapes verbatim.
|
||||
if (ch === "\\") {
|
||||
result += line.slice(i, i + 2);
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Inline code span, delimited by an equal length run of backticks.
|
||||
if (ch === "`") {
|
||||
let runEnd = i;
|
||||
while (runEnd < line.length && line[runEnd] === "`") {
|
||||
runEnd++;
|
||||
}
|
||||
const fence = line.slice(i, runEnd);
|
||||
const closeIdx = findClosingFence(line, runEnd, fence.length);
|
||||
if (closeIdx === -1) {
|
||||
result += fence;
|
||||
i = runEnd;
|
||||
continue;
|
||||
}
|
||||
result += fence + escapePipes(line.slice(runEnd, closeIdx)) + fence;
|
||||
i = closeIdx + fence.length;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Inline math, delimited by single dollar signs.
|
||||
if (ch === "$") {
|
||||
const closeIdx = findClosingMath(line, i);
|
||||
if (closeIdx === -1) {
|
||||
result += ch;
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
result += "$" + escapePipes(line.slice(i + 1, closeIdx)) + "$";
|
||||
i = closeIdx + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
result += ch;
|
||||
i++;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Escape pipe characters inside inline code spans and math within GFM tables.
|
||||
*
|
||||
* markdown-it's table block rule splits cells on the pipe character and is
|
||||
* unaware of inline code spans and math, so an unescaped pipe inside `` `...` ``
|
||||
* or `$...$` incorrectly fragments — or even truncates — the cell. Escaping
|
||||
* those pipes before block tokenization lets the table split into the right
|
||||
* cells; the backslash is stripped again when each cell's content is re-parsed
|
||||
* inline, so the code/math content is preserved exactly.
|
||||
*
|
||||
* @param src the full markdown source.
|
||||
* @returns the source with span-internal pipes within tables escaped.
|
||||
*/
|
||||
export function escapeTableSpanPipes(src: string): string {
|
||||
if (src.indexOf("|") === -1) {
|
||||
return src;
|
||||
}
|
||||
|
||||
const lines = src.split("\n");
|
||||
|
||||
for (let i = 0; i < lines.length - 1; i++) {
|
||||
const header = lines[i];
|
||||
const delimiter = lines[i + 1];
|
||||
|
||||
// A table begins with a header row containing a pipe immediately followed
|
||||
// by a delimiter row that itself contains a pipe.
|
||||
if (
|
||||
header.indexOf("|") === -1 ||
|
||||
delimiter.indexOf("|") === -1 ||
|
||||
!DELIMITER_ROW_REGEX.test(delimiter.trim())
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Escape the header and every body row until a blank line terminates the
|
||||
// table. The delimiter row itself never contains spans to escape.
|
||||
lines[i] = escapeSpanPipesInRow(header);
|
||||
let j = i + 2;
|
||||
while (j < lines.length && lines[j].trim() !== "") {
|
||||
lines[j] = escapeSpanPipesInRow(lines[j]);
|
||||
j++;
|
||||
}
|
||||
i = j - 1;
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
export default function markdownTables(md: MarkdownIt): void {
|
||||
// Escape pipes inside code/math spans before the block table rule splits
|
||||
// cells, so that those pipes are not mistaken for cell delimiters.
|
||||
md.core.ruler.before("block", "tables-pm-escape", (state) => {
|
||||
state.src = escapeTableSpanPipes(state.src);
|
||||
return false;
|
||||
});
|
||||
|
||||
// insert a new rule after the "inline" rules are parsed
|
||||
md.core.ruler.after("inline", "tables-pm", (state) => {
|
||||
const tokens = state.tokens;
|
||||
|
||||
Reference in New Issue
Block a user