mirror of
https://github.com/outline/outline.git
synced 2026-06-13 11:25:03 +03:00
fix: Pipes in math and code blocks within table cells
This commit is contained in:
@@ -6,7 +6,7 @@ import * as Y from "yjs";
|
|||||||
import type { ProsemirrorData } from "@shared/types";
|
import type { ProsemirrorData } from "@shared/types";
|
||||||
import { MentionType } from "@shared/types";
|
import { MentionType } from "@shared/types";
|
||||||
import { createContext } from "@server/context";
|
import { createContext } from "@server/context";
|
||||||
import { schema } from "@server/editor";
|
import { parser, schema, serializer } from "@server/editor";
|
||||||
import { buildProseMirrorDoc, buildUser } from "@server/test/factories";
|
import { buildProseMirrorDoc, buildUser } from "@server/test/factories";
|
||||||
import type { MentionAttrs } from "./ProsemirrorHelper";
|
import type { MentionAttrs } from "./ProsemirrorHelper";
|
||||||
import { ProsemirrorHelper } from "./ProsemirrorHelper";
|
import { ProsemirrorHelper } from "./ProsemirrorHelper";
|
||||||
@@ -1568,3 +1568,119 @@ describe("ProsemirrorHelper", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("table markdown round trip", () => {
|
||||||
|
const roundTrip = (md: string) => {
|
||||||
|
const doc = parser.parse(md);
|
||||||
|
expect(doc).not.toBeNull();
|
||||||
|
const first = serializer.serialize(doc!);
|
||||||
|
const second = serializer.serialize(parser.parse(first)!);
|
||||||
|
return { first, second };
|
||||||
|
};
|
||||||
|
|
||||||
|
const getCellTexts = (md: string) => {
|
||||||
|
const doc = parser.parse(md)!;
|
||||||
|
const table = doc.content.firstChild!;
|
||||||
|
expect(table.type.name).toBe("table");
|
||||||
|
const rows: string[][] = [];
|
||||||
|
table.forEach((row) => {
|
||||||
|
const cells: string[] = [];
|
||||||
|
row.forEach((cell) => cells.push(cell.textContent));
|
||||||
|
rows.push(cells);
|
||||||
|
});
|
||||||
|
return rows;
|
||||||
|
};
|
||||||
|
|
||||||
|
it("preserves a single inline code span containing pipes", () => {
|
||||||
|
const cells = getCellTexts(
|
||||||
|
["| A | B |", "| --- | --- |", "| x | `|y|` |", ""].join("\n")
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(cells).toEqual([
|
||||||
|
["A", "B"],
|
||||||
|
["x", "|y|"],
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("preserves multiple inline code spans with pipes in the same cell", () => {
|
||||||
|
const md = [
|
||||||
|
"| Condition | Facts |",
|
||||||
|
"| --- | --- |",
|
||||||
|
"| Absolute time difference | The system checks `|Clock_NTP_Camera1 - Clock_GPS_Camera1|` and `|Clock_NTP_Camera2 - Clock_GPS_Camera2|`. |",
|
||||||
|
"",
|
||||||
|
].join("\n");
|
||||||
|
|
||||||
|
const cells = getCellTexts(md);
|
||||||
|
expect(cells).toHaveLength(2);
|
||||||
|
expect(cells[1][0]).toBe("Absolute time difference");
|
||||||
|
expect(cells[1][1]).toBe(
|
||||||
|
"The system checks |Clock_NTP_Camera1 - Clock_GPS_Camera1| and |Clock_NTP_Camera2 - Clock_GPS_Camera2|."
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("preserves inline math containing pipes", () => {
|
||||||
|
const cells = getCellTexts(
|
||||||
|
["| A | B |", "| --- | --- |", "| x | $|a-b|$ |", ""].join("\n")
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(cells[1][0]).toBe("x");
|
||||||
|
expect(cells[1][1]).toBe("|a-b|");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("preserves identifiers with underscores and braces inside code spans", () => {
|
||||||
|
const cells = getCellTexts(
|
||||||
|
[
|
||||||
|
"| Field | Value |",
|
||||||
|
"| --- | --- |",
|
||||||
|
"| ID | `foo_{bar}|baz_{qux}` |",
|
||||||
|
"",
|
||||||
|
].join("\n")
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(cells[1][1]).toBe("foo_{bar}|baz_{qux}");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("re-serializes a table with code-span pipes idempotently", () => {
|
||||||
|
const { first, second } = roundTrip(
|
||||||
|
["| A | B |", "| --- | --- |", "| x | `|y|` |", ""].join("\n")
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(second).toBe(first);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("re-serializes a table with prose plus code-span pipes idempotently", () => {
|
||||||
|
const { first, second } = roundTrip(
|
||||||
|
[
|
||||||
|
"| Condition | Facts |",
|
||||||
|
"| --- | --- |",
|
||||||
|
"| Absolute time difference | The system checks `|Clock_NTP - Clock_GPS|`. |",
|
||||||
|
"",
|
||||||
|
].join("\n")
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(second).toBe(first);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("re-serializes a table with inline math pipes idempotently", () => {
|
||||||
|
const { first, second } = roundTrip(
|
||||||
|
["| A | B |", "| --- | --- |", "| x | $|a-b|$ |", ""].join("\n")
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(second).toBe(first);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("still splits cells on unescaped pipes outside code spans", () => {
|
||||||
|
const cells = getCellTexts(
|
||||||
|
["| A | B | C |", "| --- | --- | --- |", "| x | y | z |", ""].join("\n")
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(cells[1]).toEqual(["x", "y", "z"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not escape pipes in code spans outside of tables", () => {
|
||||||
|
const md = "Inline `a|b` code outside a table.";
|
||||||
|
const doc = parser.parse(md)!;
|
||||||
|
expect(doc.textContent).toBe("Inline a|b code outside a table.");
|
||||||
|
expect(serializer.serialize(doc).trim()).toBe(md);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -84,7 +84,12 @@ export default class Math extends Node {
|
|||||||
|
|
||||||
toMarkdown(state: MarkdownSerializerState, node: ProsemirrorNode) {
|
toMarkdown(state: MarkdownSerializerState, node: ProsemirrorNode) {
|
||||||
state.write("$");
|
state.write("$");
|
||||||
state.text(node.textContent, false);
|
// Pipes inside math would otherwise be mistaken for cell delimiters when
|
||||||
|
// the math appears within a table, so escape them here.
|
||||||
|
const content = state.inTable
|
||||||
|
? node.textContent.replace(/\|/g, "\\$&")
|
||||||
|
: node.textContent;
|
||||||
|
state.text(content, false);
|
||||||
state.write("$");
|
state.write("$");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,209 @@ const BR_TAG_REGEX = /<br\s*\/?>/gi;
|
|||||||
// Stops at <br> or newline to handle multiple checkboxes in a cell
|
// Stops at <br> or newline to handle multiple checkboxes in a cell
|
||||||
const CHECKBOX_REGEX = /^(?:-\s*)?\[(X|\s|_|-)\]\s([^<\n]*)?/i;
|
const CHECKBOX_REGEX = /^(?:-\s*)?\[(X|\s|_|-)\]\s([^<\n]*)?/i;
|
||||||
|
|
||||||
|
// A GFM table delimiter row consists only of pipes, dashes, colons and spaces,
|
||||||
|
// contains at least one dash and one pipe, e.g. "| --- | :--: |".
|
||||||
|
const DELIMITER_ROW_REGEX = /^[\s|:-]*-[\s|:-]*$/;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Escape unescaped pipe characters within a single line so that they survive
|
||||||
|
* markdown-it's GFM table cell splitting. Pipes already preceded by a backslash
|
||||||
|
* are left untouched to avoid double escaping.
|
||||||
|
*
|
||||||
|
* @param str the string to escape pipes within.
|
||||||
|
* @returns the string with unescaped pipes backslash escaped.
|
||||||
|
*/
|
||||||
|
function escapePipes(str: string): string {
|
||||||
|
return str.replace(/(?<!\\)\|/g, "\\|");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the index of a closing inline code fence — a run of backticks of exactly
|
||||||
|
* `length` — starting the search at `from`.
|
||||||
|
*
|
||||||
|
* @param line the line to search within.
|
||||||
|
* @param from the index to begin searching from.
|
||||||
|
* @param length the exact backtick run length to match.
|
||||||
|
* @returns the index of the closing fence, or -1 if none is found.
|
||||||
|
*/
|
||||||
|
function findClosingFence(line: string, from: number, length: number): number {
|
||||||
|
let i = from;
|
||||||
|
while (i < line.length) {
|
||||||
|
if (line[i] === "`") {
|
||||||
|
let j = i;
|
||||||
|
while (j < line.length && line[j] === "`") {
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
if (j - i === length) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
i = j;
|
||||||
|
} else {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the index of a valid closing inline math delimiter ("$") for an opening
|
||||||
|
* delimiter at `openIdx`, mirroring the open/close validity checks of the inline
|
||||||
|
* math rule so that currency-like text ("$5 | $10") is not treated as math.
|
||||||
|
*
|
||||||
|
* @param line the line to search within.
|
||||||
|
* @param openIdx the index of the opening "$" delimiter.
|
||||||
|
* @returns the index of the closing delimiter, or -1 if there is none.
|
||||||
|
*/
|
||||||
|
function findClosingMath(line: string, openIdx: number): number {
|
||||||
|
const next = line[openIdx + 1];
|
||||||
|
// Opening delimiter cannot be followed by whitespace, and must have content.
|
||||||
|
if (next === undefined || next === " " || next === "\t") {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = openIdx + 2; i < line.length; i++) {
|
||||||
|
if (line[i] !== "$") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count preceding backslashes — an odd count means the "$" is escaped.
|
||||||
|
let backslashes = 0;
|
||||||
|
for (let k = i - 1; k >= 0 && line[k] === "\\"; k--) {
|
||||||
|
backslashes++;
|
||||||
|
}
|
||||||
|
if (backslashes % 2 !== 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Closing delimiter cannot be preceded by whitespace or followed by a digit.
|
||||||
|
const prev = line[i - 1];
|
||||||
|
const after = line[i + 1];
|
||||||
|
const prevIsSpace = prev === " " || prev === "\t";
|
||||||
|
const afterIsDigit = after !== undefined && after >= "0" && after <= "9";
|
||||||
|
if (!prevIsSpace && !afterIsDigit) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Escape pipe characters that appear inside inline code spans (`` `...` ``) and
|
||||||
|
* inline math (`$...$`) within a single table row. The surrounding cell
|
||||||
|
* delimiters are left untouched so the row still splits into the correct cells.
|
||||||
|
*
|
||||||
|
* @param line the table row to process.
|
||||||
|
* @returns the row with span-internal pipes escaped.
|
||||||
|
*/
|
||||||
|
function escapeSpanPipesInRow(line: string): string {
|
||||||
|
let result = "";
|
||||||
|
let i = 0;
|
||||||
|
|
||||||
|
while (i < line.length) {
|
||||||
|
const ch = line[i];
|
||||||
|
|
||||||
|
// Preserve existing backslash escapes verbatim.
|
||||||
|
if (ch === "\\") {
|
||||||
|
result += line.slice(i, i + 2);
|
||||||
|
i += 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inline code span, delimited by an equal length run of backticks.
|
||||||
|
if (ch === "`") {
|
||||||
|
let runEnd = i;
|
||||||
|
while (runEnd < line.length && line[runEnd] === "`") {
|
||||||
|
runEnd++;
|
||||||
|
}
|
||||||
|
const fence = line.slice(i, runEnd);
|
||||||
|
const closeIdx = findClosingFence(line, runEnd, fence.length);
|
||||||
|
if (closeIdx === -1) {
|
||||||
|
result += fence;
|
||||||
|
i = runEnd;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
result += fence + escapePipes(line.slice(runEnd, closeIdx)) + fence;
|
||||||
|
i = closeIdx + fence.length;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inline math, delimited by single dollar signs.
|
||||||
|
if (ch === "$") {
|
||||||
|
const closeIdx = findClosingMath(line, i);
|
||||||
|
if (closeIdx === -1) {
|
||||||
|
result += ch;
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
result += "$" + escapePipes(line.slice(i + 1, closeIdx)) + "$";
|
||||||
|
i = closeIdx + 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
result += ch;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Escape pipe characters inside inline code spans and math within GFM tables.
|
||||||
|
*
|
||||||
|
* markdown-it's table block rule splits cells on the pipe character and is
|
||||||
|
* unaware of inline code spans and math, so an unescaped pipe inside `` `...` ``
|
||||||
|
* or `$...$` incorrectly fragments — or even truncates — the cell. Escaping
|
||||||
|
* those pipes before block tokenization lets the table split into the right
|
||||||
|
* cells; the backslash is stripped again when each cell's content is re-parsed
|
||||||
|
* inline, so the code/math content is preserved exactly.
|
||||||
|
*
|
||||||
|
* @param src the full markdown source.
|
||||||
|
* @returns the source with span-internal pipes within tables escaped.
|
||||||
|
*/
|
||||||
|
export function escapeTableSpanPipes(src: string): string {
|
||||||
|
if (src.indexOf("|") === -1) {
|
||||||
|
return src;
|
||||||
|
}
|
||||||
|
|
||||||
|
const lines = src.split("\n");
|
||||||
|
|
||||||
|
for (let i = 0; i < lines.length - 1; i++) {
|
||||||
|
const header = lines[i];
|
||||||
|
const delimiter = lines[i + 1];
|
||||||
|
|
||||||
|
// A table begins with a header row containing a pipe immediately followed
|
||||||
|
// by a delimiter row that itself contains a pipe.
|
||||||
|
if (
|
||||||
|
header.indexOf("|") === -1 ||
|
||||||
|
delimiter.indexOf("|") === -1 ||
|
||||||
|
!DELIMITER_ROW_REGEX.test(delimiter.trim())
|
||||||
|
) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Escape the header and every body row until a blank line terminates the
|
||||||
|
// table. The delimiter row itself never contains spans to escape.
|
||||||
|
lines[i] = escapeSpanPipesInRow(header);
|
||||||
|
let j = i + 2;
|
||||||
|
while (j < lines.length && lines[j].trim() !== "") {
|
||||||
|
lines[j] = escapeSpanPipesInRow(lines[j]);
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
i = j - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
export default function markdownTables(md: MarkdownIt): void {
|
export default function markdownTables(md: MarkdownIt): void {
|
||||||
|
// Escape pipes inside code/math spans before the block table rule splits
|
||||||
|
// cells, so that those pipes are not mistaken for cell delimiters.
|
||||||
|
md.core.ruler.before("block", "tables-pm-escape", (state) => {
|
||||||
|
state.src = escapeTableSpanPipes(state.src);
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
// insert a new rule after the "inline" rules are parsed
|
// insert a new rule after the "inline" rules are parsed
|
||||||
md.core.ruler.after("inline", "tables-pm", (state) => {
|
md.core.ruler.after("inline", "tables-pm", (state) => {
|
||||||
const tokens = state.tokens;
|
const tokens = state.tokens;
|
||||||
|
|||||||
Reference in New Issue
Block a user