chore: Refactor Markdown importer to use new import pipeline (#12361)

* chore: Refactor Markdown importer to use new import pipeline

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Tom Moor
2026-05-16 14:10:15 -04:00
committed by GitHub
parent 170f59c6ba
commit 82d7041b6b
20 changed files with 1224 additions and 508 deletions
@@ -30,6 +30,10 @@ export class NotionImportsProcessor extends ImportsProcessor<IntegrationService.
importModel: Import<IntegrationService.Notion>,
transaction: Transaction
): Promise<NotionImportTaskInput> {
if (!importModel.integrationId) {
throw new Error("Notion import is missing integrationId");
}
const integration = await Integration.scope("withAuthentication").findByPk(
importModel.integrationId,
{ rejectOnEmpty: true }
@@ -29,15 +29,19 @@ export default class NotionAPIImportTask extends APIImportTask<IntegrationServic
];
/**
* Process the Notion import task.
* Process a Notion page-phase import task.
* This fetches data from Notion and converts it to task output.
*
* @param importTask ImportTask model to process.
* @returns Promise with output that resolves once processing has completed.
*/
protected async process(
protected async processPage(
importTask: ImportTask<IntegrationService.Notion>
): Promise<ProcessOutput<IntegrationService.Notion>> {
if (!importTask.import.integrationId) {
throw new Error("Notion import is missing integrationId");
}
const integration = await Integration.scope("withAuthentication").findByPk(
importTask.import.integrationId,
{ rejectOnEmpty: true }
@@ -47,7 +51,7 @@ export default class NotionAPIImportTask extends APIImportTask<IntegrationServic
const parsedPages: (ParsePageOutput | null)[] = [];
for (const item of importTask.input) {
parsedPages.push(await this.processPage({ item, client }));
parsedPages.push(await this.parsePage({ item, client }));
}
// Filter out any null results (from pages/databases that couldn't be accessed)
@@ -56,7 +60,7 @@ export default class NotionAPIImportTask extends APIImportTask<IntegrationServic
const taskOutput: ImportTaskOutput = validParsedPages.map((parsedPage) => ({
externalId: parsedPage.externalId,
title: parsedPage.title,
emoji: parsedPage.emoji,
icon: parsedPage.icon,
content: parsedPage.content,
author: parsedPage.author,
createdAt: parsedPage.createdAt,
@@ -96,7 +100,7 @@ export default class NotionAPIImportTask extends APIImportTask<IntegrationServic
* @param client Notion client.
* @returns Promise of parsed page output that resolves when the task is scheduled.
*/
private async processPage({
private async parsePage({
item,
client,
}: {
@@ -112,13 +116,14 @@ export default class NotionAPIImportTask extends APIImportTask<IntegrationServic
try {
// Convert Notion database to an empty page with "pages in database" as its children.
if (item.type === PageType.Database) {
const { pages, ...databaseInfo } = await client.fetchDatabase(
const { pages, emoji, ...databaseInfo } = await client.fetchDatabase(
item.externalId,
{ titleMaxLength }
);
return {
...databaseInfo,
icon: emoji,
externalId: item.externalId,
content: ProsemirrorHelper.getEmptyDocument() as ProsemirrorDoc,
collectionExternalId,
@@ -129,12 +134,14 @@ export default class NotionAPIImportTask extends APIImportTask<IntegrationServic
};
}
const { blocks, ...pageInfo } = await client.fetchPage(item.externalId, {
titleMaxLength,
});
const { blocks, emoji, ...pageInfo } = await client.fetchPage(
item.externalId,
{ titleMaxLength }
);
return {
...pageInfo,
icon: emoji,
externalId: item.externalId,
content: NotionConverter.page({ children: blocks } as NotionPage),
collectionExternalId,