mirror of
https://github.com/outline/outline.git
synced 2026-06-13 03:14:59 +03:00
539 lines
16 KiB
TypeScript
539 lines
16 KiB
TypeScript
import type MarkdownIt from "markdown-it";
|
|
import type StateBlock from "markdown-it/lib/rules_block/state_block.mjs";
|
|
|
|
const BREAK_REGEX = /(?<=^|[^\\])\\n/;
|
|
const BR_TAG_REGEX = /<br\s*\/?>/gi;
|
|
// Matches checkbox syntax with optional list prefix: "- [x] Task" or "[x] Task"
|
|
// Stops at <br> or newline to handle multiple checkboxes in a cell
|
|
const CHECKBOX_REGEX = /^(?:-\s*)?\[(X|\s|_|-)\]\s([^<\n]*)?/i;
|
|
|
|
const TAB = 0x09;
|
|
const SPACE = 0x20;
|
|
const DOLLAR = 0x24;
|
|
const BACKSLASH = 0x5c;
|
|
const BACKTICK = 0x60;
|
|
const PIPE = 0x7c;
|
|
const HYPHEN = 0x2d;
|
|
const COLON = 0x3a;
|
|
const DIGIT_0 = 0x30;
|
|
const DIGIT_9 = 0x39;
|
|
const MAX_AUTOCOMPLETED_CELLS = 0x10000;
|
|
|
|
function isSpace(code: number): boolean {
|
|
return code === SPACE || code === TAB;
|
|
}
|
|
|
|
/**
|
|
* Skip over a backtick-delimited code span starting at `pos`.
|
|
*
|
|
* @returns the position just past the closing run if a balanced span exists,
|
|
* otherwise -1 (indicating the backticks at `pos` are literal text).
|
|
*/
|
|
function skipCodeSpan(str: string, pos: number): number {
|
|
const max = str.length;
|
|
const runStart = pos;
|
|
while (pos < max && str.charCodeAt(pos) === BACKTICK) {
|
|
pos++;
|
|
}
|
|
const tickCount = pos - runStart;
|
|
|
|
let scan = pos;
|
|
while (scan < max) {
|
|
if (str.charCodeAt(scan) !== BACKTICK) {
|
|
scan++;
|
|
continue;
|
|
}
|
|
const closeStart = scan;
|
|
while (scan < max && str.charCodeAt(scan) === BACKTICK) {
|
|
scan++;
|
|
}
|
|
if (scan - closeStart === tickCount) {
|
|
return scan;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/**
|
|
* Skip over an inline $...$ math span starting at `pos`.
|
|
*
|
|
* Mirrors the opener/closer constraints used by the math inline rule so that
|
|
* literal dollar amounts (e.g. "$5") do not accidentally consume a cell.
|
|
*
|
|
* @returns the position just past the closing `$` if a balanced span exists,
|
|
* otherwise -1.
|
|
*/
|
|
function skipMathSpan(str: string, pos: number): number {
|
|
const max = str.length;
|
|
// Opener: next char must not be whitespace.
|
|
const next = pos + 1 < max ? str.charCodeAt(pos + 1) : -1;
|
|
if (next === -1 || isSpace(next)) {
|
|
return -1;
|
|
}
|
|
|
|
let scan = pos + 1;
|
|
while (scan < max) {
|
|
const ch = str.charCodeAt(scan);
|
|
if (ch === BACKSLASH) {
|
|
// Skip escaped char.
|
|
scan += 2;
|
|
continue;
|
|
}
|
|
if (ch === DOLLAR) {
|
|
const prev = str.charCodeAt(scan - 1);
|
|
const after = scan + 1 < max ? str.charCodeAt(scan + 1) : -1;
|
|
// Closer: prev not whitespace, next not a digit.
|
|
if (!isSpace(prev) && !(after >= DIGIT_0 && after <= DIGIT_9)) {
|
|
return scan + 1;
|
|
}
|
|
}
|
|
scan++;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/**
|
|
* Split a table row on unescaped pipes, while ignoring pipes that fall inside
|
|
* inline code spans or inline math spans. This is a superset of markdown-it's
|
|
* default behaviour, which only honours `\|` escapes and so silently truncates
|
|
* cells whose content contains literal `|` characters inside `` `...` `` or
|
|
* `$...$`.
|
|
*/
|
|
function escapedSplit(str: string): string[] {
|
|
const result: string[] = [];
|
|
const max = str.length;
|
|
|
|
let pos = 0;
|
|
let isEscaped = false;
|
|
let lastPos = 0;
|
|
let current = "";
|
|
|
|
while (pos < max) {
|
|
const ch = str.charCodeAt(pos);
|
|
|
|
if (!isEscaped && ch === BACKTICK) {
|
|
const end = skipCodeSpan(str, pos);
|
|
if (end !== -1) {
|
|
pos = end;
|
|
isEscaped = false;
|
|
continue;
|
|
}
|
|
} else if (!isEscaped && ch === DOLLAR) {
|
|
const end = skipMathSpan(str, pos);
|
|
if (end !== -1) {
|
|
pos = end;
|
|
isEscaped = false;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (ch === PIPE) {
|
|
if (!isEscaped) {
|
|
result.push(current + str.substring(lastPos, pos));
|
|
current = "";
|
|
lastPos = pos + 1;
|
|
} else {
|
|
// Escaped pipe `\|` becomes part of the current cell, dropping the
|
|
// leading backslash.
|
|
current += str.substring(lastPos, pos - 1);
|
|
lastPos = pos;
|
|
}
|
|
}
|
|
|
|
isEscaped = ch === BACKSLASH;
|
|
pos++;
|
|
}
|
|
|
|
result.push(current + str.substring(lastPos));
|
|
return result;
|
|
}
|
|
|
|
function getLine(state: StateBlock, line: number): string {
|
|
const pos = state.bMarks[line] + state.tShift[line];
|
|
const max = state.eMarks[line];
|
|
return state.src.slice(pos, max);
|
|
}
|
|
|
|
/**
|
|
* GFM table block rule, forked from markdown-it. The only behavioural change
|
|
* from the upstream rule is that {@link escapedSplit} also recognises
|
|
* backtick-delimited code spans and `$...$` math spans, so pipes inside such
|
|
* spans no longer split the row.
|
|
*/
|
|
function tableRule(
|
|
state: StateBlock,
|
|
startLine: number,
|
|
endLine: number,
|
|
silent: boolean
|
|
): boolean {
|
|
if (startLine + 2 > endLine) {
|
|
return false;
|
|
}
|
|
|
|
let nextLine = startLine + 1;
|
|
if (state.sCount[nextLine] < state.blkIndent) {
|
|
return false;
|
|
}
|
|
if (state.sCount[nextLine] - state.blkIndent >= 4) {
|
|
return false;
|
|
}
|
|
|
|
let pos = state.bMarks[nextLine] + state.tShift[nextLine];
|
|
if (pos >= state.eMarks[nextLine]) {
|
|
return false;
|
|
}
|
|
|
|
const firstCh = state.src.charCodeAt(pos++);
|
|
if (firstCh !== PIPE && firstCh !== HYPHEN && firstCh !== COLON) {
|
|
return false;
|
|
}
|
|
if (pos >= state.eMarks[nextLine]) {
|
|
return false;
|
|
}
|
|
|
|
const secondCh = state.src.charCodeAt(pos++);
|
|
if (
|
|
secondCh !== PIPE &&
|
|
secondCh !== HYPHEN &&
|
|
secondCh !== COLON &&
|
|
!isSpace(secondCh)
|
|
) {
|
|
return false;
|
|
}
|
|
if (firstCh === HYPHEN && isSpace(secondCh)) {
|
|
return false;
|
|
}
|
|
|
|
while (pos < state.eMarks[nextLine]) {
|
|
const ch = state.src.charCodeAt(pos);
|
|
if (ch !== PIPE && ch !== HYPHEN && ch !== COLON && !isSpace(ch)) {
|
|
return false;
|
|
}
|
|
pos++;
|
|
}
|
|
|
|
let lineText = getLine(state, startLine + 1);
|
|
let columns = lineText.split("|");
|
|
const aligns: string[] = [];
|
|
for (let i = 0; i < columns.length; i++) {
|
|
const t = columns[i].trim();
|
|
if (!t) {
|
|
if (i === 0 || i === columns.length - 1) {
|
|
continue;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
if (!/^:?-+:?$/.test(t)) {
|
|
return false;
|
|
}
|
|
if (t.charCodeAt(t.length - 1) === COLON) {
|
|
aligns.push(t.charCodeAt(0) === COLON ? "center" : "right");
|
|
} else if (t.charCodeAt(0) === COLON) {
|
|
aligns.push("left");
|
|
} else {
|
|
aligns.push("");
|
|
}
|
|
}
|
|
|
|
lineText = getLine(state, startLine).trim();
|
|
if (lineText.indexOf("|") === -1) {
|
|
return false;
|
|
}
|
|
if (state.sCount[startLine] - state.blkIndent >= 4) {
|
|
return false;
|
|
}
|
|
columns = escapedSplit(lineText);
|
|
if (columns.length && columns[0] === "") {
|
|
columns.shift();
|
|
}
|
|
if (columns.length && columns[columns.length - 1] === "") {
|
|
columns.pop();
|
|
}
|
|
|
|
const columnCount = columns.length;
|
|
if (columnCount === 0 || columnCount !== aligns.length) {
|
|
return false;
|
|
}
|
|
|
|
if (silent) {
|
|
return true;
|
|
}
|
|
|
|
const oldParentType = state.parentType;
|
|
// The StateBlock type only knows the standard parent-type strings. Casting
|
|
// keeps us in lockstep with the upstream rule.
|
|
state.parentType = "table" as typeof state.parentType;
|
|
|
|
const terminatorRules = state.md.block.ruler.getRules("blockquote");
|
|
|
|
const tableTokenOpen = state.push("table_open", "table", 1);
|
|
const tableLines: [number, number] = [startLine, 0];
|
|
tableTokenOpen.map = tableLines;
|
|
|
|
const tHeadOpen = state.push("thead_open", "thead", 1);
|
|
tHeadOpen.map = [startLine, startLine + 1];
|
|
|
|
const headerRowOpen = state.push("tr_open", "tr", 1);
|
|
headerRowOpen.map = [startLine, startLine + 1];
|
|
|
|
for (let i = 0; i < columns.length; i++) {
|
|
const thOpen = state.push("th_open", "th", 1);
|
|
if (aligns[i]) {
|
|
thOpen.attrs = [["style", "text-align:" + aligns[i]]];
|
|
}
|
|
const inlineToken = state.push("inline", "", 0);
|
|
inlineToken.content = columns[i].trim();
|
|
inlineToken.children = [];
|
|
state.push("th_close", "th", -1);
|
|
}
|
|
|
|
state.push("tr_close", "tr", -1);
|
|
state.push("thead_close", "thead", -1);
|
|
|
|
let tBodyLines: [number, number] | undefined;
|
|
let autocompletedCells = 0;
|
|
|
|
for (nextLine = startLine + 2; nextLine < endLine; nextLine++) {
|
|
if (state.sCount[nextLine] < state.blkIndent) {
|
|
break;
|
|
}
|
|
|
|
let terminate = false;
|
|
for (let i = 0, l = terminatorRules.length; i < l; i++) {
|
|
if (terminatorRules[i](state, nextLine, endLine, true)) {
|
|
terminate = true;
|
|
break;
|
|
}
|
|
}
|
|
if (terminate) {
|
|
break;
|
|
}
|
|
|
|
lineText = getLine(state, nextLine).trim();
|
|
if (!lineText) {
|
|
break;
|
|
}
|
|
if (state.sCount[nextLine] - state.blkIndent >= 4) {
|
|
break;
|
|
}
|
|
|
|
columns = escapedSplit(lineText);
|
|
if (columns.length && columns[0] === "") {
|
|
columns.shift();
|
|
}
|
|
if (columns.length && columns[columns.length - 1] === "") {
|
|
columns.pop();
|
|
}
|
|
|
|
autocompletedCells += columnCount - columns.length;
|
|
if (autocompletedCells > MAX_AUTOCOMPLETED_CELLS) {
|
|
break;
|
|
}
|
|
|
|
if (nextLine === startLine + 2) {
|
|
const tBodyOpen = state.push("tbody_open", "tbody", 1);
|
|
tBodyLines = [startLine + 2, 0];
|
|
tBodyOpen.map = tBodyLines;
|
|
}
|
|
|
|
const rowOpen = state.push("tr_open", "tr", 1);
|
|
rowOpen.map = [nextLine, nextLine + 1];
|
|
|
|
for (let i = 0; i < columnCount; i++) {
|
|
const tdOpen = state.push("td_open", "td", 1);
|
|
if (aligns[i]) {
|
|
tdOpen.attrs = [["style", "text-align:" + aligns[i]]];
|
|
}
|
|
const inlineToken = state.push("inline", "", 0);
|
|
inlineToken.content = columns[i] ? columns[i].trim() : "";
|
|
inlineToken.children = [];
|
|
state.push("td_close", "td", -1);
|
|
}
|
|
state.push("tr_close", "tr", -1);
|
|
}
|
|
|
|
if (tBodyLines) {
|
|
state.push("tbody_close", "tbody", -1);
|
|
tBodyLines[1] = nextLine;
|
|
}
|
|
|
|
state.push("table_close", "table", -1);
|
|
tableLines[1] = nextLine;
|
|
|
|
state.parentType = oldParentType;
|
|
state.line = nextLine;
|
|
return true;
|
|
}
|
|
|
|
export default function markdownTables(md: MarkdownIt): void {
|
|
// Replace the built-in GFM table rule with one that ignores pipes inside
|
|
// inline code and math spans when splitting cells. Without this, content
|
|
// such as `` `|a-b|` `` would silently split a row into extra cells and
|
|
// truncate the document on round-trip.
|
|
md.block.ruler.at("table", tableRule, { alt: ["paragraph", "reference"] });
|
|
|
|
// insert a new rule after the "inline" rules are parsed
|
|
md.core.ruler.after("inline", "tables-pm", (state) => {
|
|
const tokens = state.tokens;
|
|
let inside = false;
|
|
|
|
for (let i = tokens.length - 1; i > 0; i--) {
|
|
if (inside) {
|
|
tokens[i].level--;
|
|
}
|
|
|
|
// convert unescaped \n and <br> tags in the text into real br tokens
|
|
if (
|
|
tokens[i].type === "inline" &&
|
|
(tokens[i].content.match(BREAK_REGEX) ||
|
|
tokens[i].content.match(BR_TAG_REGEX))
|
|
) {
|
|
const existing = tokens[i].children || [];
|
|
tokens[i].children = [];
|
|
|
|
existing.forEach((child) => {
|
|
// Skip processing math content to preserve LaTeX escape sequences
|
|
if (child.type === "math_inline") {
|
|
tokens[i].children?.push(child);
|
|
return;
|
|
}
|
|
|
|
let content = child.content;
|
|
|
|
// First handle <br> tags
|
|
if (content.match(BR_TAG_REGEX) && child.type !== "code_inline") {
|
|
content = content.replace(BR_TAG_REGEX, "\\n");
|
|
}
|
|
|
|
const breakParts = content.split(BREAK_REGEX);
|
|
|
|
// a schema agnostic way to know if a node is inline code would be
|
|
// great, for now we are stuck checking the node type.
|
|
if (breakParts.length > 1 && child.type !== "code_inline") {
|
|
breakParts.forEach((part, index) => {
|
|
const token = new state.Token("text", "", 1);
|
|
token.content = part.trim();
|
|
tokens[i].children?.push(token);
|
|
|
|
if (index < breakParts.length - 1) {
|
|
const brToken = new state.Token("br", "br", 1);
|
|
tokens[i].children?.push(brToken);
|
|
}
|
|
});
|
|
} else {
|
|
tokens[i].children?.push(child);
|
|
}
|
|
});
|
|
}
|
|
|
|
// filter out incompatible tokens from markdown-it that we don't need
|
|
// in prosemirror. thead/tbody do nothing.
|
|
if (
|
|
["thead_open", "thead_close", "tbody_open", "tbody_close"].includes(
|
|
tokens[i].type
|
|
)
|
|
) {
|
|
inside = !inside;
|
|
tokens.splice(i, 1);
|
|
}
|
|
|
|
if (["th_open", "td_open"].includes(tokens[i].type)) {
|
|
// markdown-it table parser stores alignment as html styles, convert
|
|
// to a simple string here
|
|
const tokenAttrs = tokens[i].attrs;
|
|
if (tokenAttrs) {
|
|
const style = tokenAttrs[0][1];
|
|
tokens[i].info = style.split(":")[1];
|
|
}
|
|
|
|
// Find the corresponding close token
|
|
const closeType =
|
|
tokens[i].type === "th_open" ? "th_close" : "td_close";
|
|
let closeIndex = i + 2; // Start after inline token
|
|
while (
|
|
closeIndex < tokens.length &&
|
|
tokens[closeIndex].type !== closeType
|
|
) {
|
|
closeIndex++;
|
|
}
|
|
|
|
// Check if the cell content looks like a checkbox (or multiple checkboxes)
|
|
const inlineToken = tokens[i + 1];
|
|
if (inlineToken?.type !== "inline") {
|
|
// No inline content, just add paragraph wrapper
|
|
tokens.splice(
|
|
closeIndex,
|
|
0,
|
|
new state.Token("paragraph_close", "p", -1)
|
|
);
|
|
tokens.splice(i + 1, 0, new state.Token("paragraph_open", "p", 1));
|
|
continue;
|
|
}
|
|
|
|
// Split content by <br> to find all checkboxes
|
|
const parts = inlineToken.content.split(BR_TAG_REGEX);
|
|
const checkboxItems: Array<{ checked: boolean; label: string }> = [];
|
|
|
|
for (const part of parts) {
|
|
const match = part.trim().match(CHECKBOX_REGEX);
|
|
if (match) {
|
|
checkboxItems.push({
|
|
checked: match[1].toLowerCase() === "x",
|
|
label: match[2] || "",
|
|
});
|
|
}
|
|
}
|
|
|
|
if (checkboxItems.length > 0) {
|
|
// Build tokens for all checkbox items
|
|
const newTokens: InstanceType<typeof state.Token>[] = [];
|
|
|
|
// Opening: checkbox_list_open
|
|
newTokens.push(new state.Token("checkbox_list_open", "ul", 1));
|
|
|
|
// Add each checkbox item
|
|
for (const item of checkboxItems) {
|
|
const itemOpen = new state.Token("checkbox_item_open", "li", 1);
|
|
if (item.checked) {
|
|
itemOpen.attrs = [["checked", "true"]];
|
|
}
|
|
newTokens.push(itemOpen);
|
|
newTokens.push(new state.Token("paragraph_open", "p", 1));
|
|
|
|
// Create inline token for the label
|
|
const labelInline = new state.Token("inline", "", 0);
|
|
labelInline.content = item.label;
|
|
const textToken = new state.Token("text", "", 0);
|
|
textToken.content = item.label;
|
|
labelInline.children = [textToken];
|
|
newTokens.push(labelInline);
|
|
|
|
newTokens.push(new state.Token("paragraph_close", "p", -1));
|
|
newTokens.push(new state.Token("checkbox_item_close", "li", -1));
|
|
}
|
|
|
|
// Closing: checkbox_list_close
|
|
newTokens.push(new state.Token("checkbox_list_close", "ul", -1));
|
|
|
|
// Replace the inline token with our new structure
|
|
tokens.splice(i + 1, closeIndex - i - 1, ...newTokens);
|
|
} else {
|
|
// markdown-it table parser does not return paragraphs inside the cells
|
|
// but prosemirror requires them, so we add 'em in here.
|
|
// Insert closing token first (before closeIndex shifts)
|
|
tokens.splice(
|
|
closeIndex,
|
|
0,
|
|
new state.Token("paragraph_close", "p", -1)
|
|
);
|
|
// Then insert opening token
|
|
tokens.splice(i + 1, 0, new state.Token("paragraph_open", "p", 1));
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
});
|
|
}
|