diff --git a/app/models/Document.ts b/app/models/Document.ts index 1ce0cfeb6b..f699057a0c 100644 --- a/app/models/Document.ts +++ b/app/models/Document.ts @@ -203,6 +203,9 @@ export default class Document extends ArchivableModel implements Searchable { @observable publishedAt: string | undefined; + @observable + popularityScore: number; + /** * @deprecated Use path instead */ diff --git a/app/scenes/Collection/index.tsx b/app/scenes/Collection/index.tsx index 5428b50c97..2a9fdc0521 100644 --- a/app/scenes/Collection/index.tsx +++ b/app/scenes/Collection/index.tsx @@ -57,6 +57,7 @@ const ShareButton = lazyWithRetry(() => import("./components/ShareButton")); enum CollectionPath { Overview = "overview", Recent = "recent", + Popular = "popular", Updated = "updated", Published = "published", Old = "old", @@ -242,6 +243,9 @@ const CollectionScene = observer(function _CollectionScene() { {t("Documents")} {!collection.isArchived && ( <> + + {t("Popular")} + {t("Recently updated")} @@ -353,6 +357,21 @@ const CollectionScene = observer(function _CollectionScene() { }} /> + + + {t("Recently viewed")} + + {t("Popular")} + {t("Recently updated")} @@ -68,7 +71,20 @@ function Home() { {t("Weird, this shouldn’t ever be empty")}} + empty={{t("Weird, this shouldn't ever be empty")}} + showCollection + /> + + + + {t("Documents with recent activity will appear here")} + + } showCollection /> diff --git a/app/stores/DocumentsStore.ts b/app/stores/DocumentsStore.ts index 9560eefe17..b1cb2f126d 100644 --- a/app/stores/DocumentsStore.ts +++ b/app/stores/DocumentsStore.ts @@ -96,6 +96,11 @@ export default class DocumentsStore extends Store { return orderBy(this.all, "updatedAt", "desc"); } + @computed + get popular(): Document[] { + return orderBy(this.all, "popularityScore", "desc"); + } + @computed get templates(): Document[] { return orderBy( @@ -208,6 +213,10 @@ export default class DocumentsStore extends Store { return naturalSort(this.inCollection(collectionId), "title"); } + popularInCollection(collectionId: string): Document[] { + return orderBy(this.inCollection(collectionId), "popularityScore", "desc"); + } + get(id: string): Document | undefined { return id ? (this.data.get(id) ?? @@ -386,6 +395,14 @@ export default class DocumentsStore extends Store { options?: PaginationParams ): Promise => this.fetchNamedPage("viewed", options); + @action + fetchPopular = async (options?: PaginationParams): Promise => + this.fetchNamedPage("list", { + sort: "popularityScore", + direction: "DESC", + ...options, + }); + @action fetchStarred = (options?: PaginationParams): Promise => this.fetchNamedPage("starred", options); diff --git a/server/migrations/20251125012929-add-popularity-score-to-documents.js b/server/migrations/20251125012929-add-popularity-score-to-documents.js new file mode 100644 index 0000000000..35bca8e297 --- /dev/null +++ b/server/migrations/20251125012929-add-popularity-score-to-documents.js @@ -0,0 +1,16 @@ +"use strict"; + +/** @type {import('sequelize-cli').Migration} */ +module.exports = { + async up(queryInterface, Sequelize) { + await queryInterface.addColumn("documents", "popularityScore", { + type: Sequelize.FLOAT, + allowNull: false, + defaultValue: 0, + }); + }, + + async down(queryInterface, Sequelize) { + await queryInterface.removeColumn("documents", "popularityScore"); + }, +}; diff --git a/server/models/Document.ts b/server/models/Document.ts index e945c45d32..51df6d9a1f 100644 --- a/server/models/Document.ts +++ b/server/models/Document.ts @@ -40,6 +40,7 @@ import { BelongsToMany, Unique, AfterUpdate, + IsFloat, } from "sequelize-typescript"; import { MaxLength } from "class-validator"; import isUUID from "validator/lib/isUUID"; @@ -382,6 +383,13 @@ class Document extends ArchivableModel< @Column(DataType.INTEGER) revisionCount: number; + /** A score representing the popularity of this document based on views and engagement. */ + @IsFloat + @Default(0) + @Column(DataType.FLOAT) + @SkipChangeset + popularityScore: number; + /** Whether the document is published, and if so when. */ @IsDate @Column diff --git a/server/models/helpers/SearchHelper.ts b/server/models/helpers/SearchHelper.ts index 02d023a5fe..fb4d4d6b40 100644 --- a/server/models/helpers/SearchHelper.ts +++ b/server/models/helpers/SearchHelper.ts @@ -483,9 +483,11 @@ export default class SearchHelper { const order: Order = [["updatedAt", "DESC"]]; if (query) { + // Combine text relevance with logarithmic popularity boost + // Popular documents get a boost, but text relevance remains primary attributes.push([ Sequelize.literal( - `ts_rank("searchVector", to_tsquery('english', :query))` + `ts_rank("searchVector", to_tsquery('english', :query)) * (1 + LN(1 + COALESCE("popularityScore", 0)))` ), "searchRanking", ]); diff --git a/server/presenters/document.ts b/server/presenters/document.ts index 8d358a5ffb..3b845c6fe5 100644 --- a/server/presenters/document.ts +++ b/server/presenters/document.ts @@ -94,6 +94,7 @@ async function presentDocument( res.templateId = document.templateId; res.template = document.template; res.insightsEnabled = document.insightsEnabled; + res.popularityScore = document.popularityScore; res.sourceMetadata = document.sourceMetadata ? { importedAt: source?.createdAt ?? document.createdAt, diff --git a/server/queues/tasks/UpdateDocumentsPopularityScoreTask.ts b/server/queues/tasks/UpdateDocumentsPopularityScoreTask.ts new file mode 100644 index 0000000000..38dc7f1d53 --- /dev/null +++ b/server/queues/tasks/UpdateDocumentsPopularityScoreTask.ts @@ -0,0 +1,370 @@ +import crypto from "crypto"; +import { subWeeks } from "date-fns"; +import { QueryTypes } from "sequelize"; +import Logger from "@server/logging/Logger"; +import BaseTask, { TaskSchedule } from "./BaseTask"; +import { sequelize } from "@server/storage/database"; + +type Props = Record; + +/** + * Gravity constant for time decay. Higher values cause faster decay of older content. + * With `GRAVITY = 0.7`: + * - Content from **1 day ago** retains ~30% of its score + * - Content from **3 days ago** retains ~15% of its score + * - Content from **1 week ago** retains ~8% of its score + * - Content from **2 weeks ago** retains ~4% of its score + */ +const GRAVITY = 0.7; + +/** + * Number of hours to add to age to smooth the decay curve, + * preventing brand new content from having disproportionately + * high scores compared to content just a few hours old. + */ +const TIME_OFFSET_HOURS = 2; + +/** + * Weight multipliers for different activity types relative to base score + */ +const ACTIVITY_WEIGHTS = { + revision: 1.0, + comment: 1.2, + view: 0.5, +}; + +/** + * Only recalculate scores for activity within this period. + */ +const ACTIVITY_THRESHOLD_WEEKS = 2; + +/** + * Batch size for processing updates - each batch is an independent transaction + */ +const BATCH_SIZE = 500; + +/** + * Maximum retries for failed batch operations + */ +const MAX_RETRIES = 3; + +/** + * Delay between retries in milliseconds + */ +const RETRY_DELAY_MS = 1000; + +/** + * Base name for the working table used to track documents to process + */ +const WORKING_TABLE_PREFIX = "popularity_score_working"; + +export default class UpdateDocumentsPopularityScoreTask extends BaseTask { + /** + * Unique table name for this task run to prevent conflicts with concurrent runs + */ + private workingTable: string = ""; + static cron = TaskSchedule.Day; + + public async perform() { + Logger.info("task", "Updating document popularity scores…"); + + const now = new Date(); + const activityThreshold = subWeeks(now, ACTIVITY_THRESHOLD_WEEKS); + + // Generate unique table name for this run to prevent conflicts + const uniqueId = crypto.randomBytes(8).toString("hex"); + this.workingTable = `${WORKING_TABLE_PREFIX}_${uniqueId}`; + + try { + // Setup: Create working table and populate with active document IDs + await this.setupWorkingTable(activityThreshold); + + const activeCount = await this.getWorkingTableCount(); + + if (activeCount === 0) { + Logger.info("task", "No documents with recent activity found"); + return; + } + + Logger.info( + "task", + `Found ${activeCount} documents with recent activity` + ); + + // Process documents in independent batches + let totalUpdated = 0; + let totalErrors = 0; + let batchNumber = 0; + + while (true) { + const remaining = await this.getWorkingTableCount(); + if (remaining === 0) { + break; + } + + batchNumber++; + + try { + const updated = await this.processBatchWithRetry( + activityThreshold, + now + ); + totalUpdated += updated; + + Logger.debug( + "task", + `Batch ${batchNumber}: updated ${updated} documents, ${remaining - updated} remaining` + ); + } catch (error) { + totalErrors++; + Logger.error(`Batch ${batchNumber} failed after retries`, error); + + // Remove failed batch from working table to prevent infinite loop + await this.skipCurrentBatch(); + } + } + + Logger.info( + "task", + `Completed updating popularity scores: ${totalUpdated} documents updated, ${totalErrors} batch errors` + ); + } catch (error) { + Logger.error("Failed to update document popularity scores", error); + throw error; + } finally { + // Always clean up the working table + await this.cleanupWorkingTable(); + } + } + + /** + * Creates an unlogged working table and populates it with document IDs + * that have recent activity. Unlogged tables are faster because they + * skip WAL logging, and data loss on crash is acceptable here. + */ + private async setupWorkingTable(activityThreshold: Date): Promise { + // Drop any existing table first to avoid type conflicts from previous crashed runs + await sequelize.query(`DROP TABLE IF EXISTS ${this.workingTable} CASCADE`); + + // Create unlogged table - faster than regular tables as it skips WAL logging + await sequelize.query(` + CREATE UNLOGGED TABLE ${this.workingTable} ( + "documentId" UUID PRIMARY KEY, + processed BOOLEAN DEFAULT FALSE + ) + `); + + // Populate with documents that have recent activity and are valid + // (published, not deleted). Using JOINs to filter upfront. + await sequelize.query( + ` + INSERT INTO ${this.workingTable} ("documentId") + SELECT DISTINCT d.id + FROM documents d + WHERE d."publishedAt" IS NOT NULL + AND d."deletedAt" IS NULL + AND ( + EXISTS ( + SELECT 1 FROM revisions r + WHERE r."documentId" = d.id AND r."createdAt" >= :threshold + ) + OR EXISTS ( + SELECT 1 FROM comments c + WHERE c."documentId" = d.id AND c."createdAt" >= :threshold + ) + OR EXISTS ( + SELECT 1 FROM views v + WHERE v."documentId" = d.id AND v."updatedAt" >= :threshold + ) + ) + `, + { replacements: { threshold: activityThreshold } } + ); + + // Create index on processed column for efficient batch selection + await sequelize.query(` + CREATE INDEX ON ${this.workingTable} (processed) WHERE NOT processed + `); + } + + /** + * Returns count of unprocessed documents in working table + */ + private async getWorkingTableCount(): Promise { + const [result] = await sequelize.query<{ count: string }>( + `SELECT COUNT(*) as count FROM ${this.workingTable} WHERE NOT processed`, + { type: QueryTypes.SELECT } + ); + return parseInt(result.count, 10); + } + + /** + * Processes a batch of documents with retry logic. + * Each batch is an independent transaction that commits on success. + */ + private async processBatchWithRetry( + activityThreshold: Date, + now: Date, + attempt = 1 + ): Promise { + try { + return await sequelize.transaction(async (transaction) => { + // Select and lock a batch of unprocessed documents + const batch = await sequelize.query<{ documentId: string }>( + ` + SELECT "documentId" FROM ${this.workingTable} + WHERE NOT processed + ORDER BY "documentId" + LIMIT :limit + FOR UPDATE SKIP LOCKED + `, + { + replacements: { limit: BATCH_SIZE }, + type: QueryTypes.SELECT, + transaction, + } + ); + + if (batch.length === 0) { + return 0; + } + + const documentIds = batch.map((b) => b.documentId); + + // Build VALUES clause for the batch, sequelize did not like array parameters in casted in clause. + const valuesClause = documentIds + .map((id) => `('${id}'::uuid)`) + .join(", "); + + // Calculate and update scores using JOINs (no IN clause with large arrays) + await sequelize.query( + ` + WITH batch_docs AS ( + SELECT * FROM (VALUES ${valuesClause}) AS t(id) + ), + revision_scores AS ( + SELECT + r."documentId", + SUM(:revisionWeight / POWER( + GREATEST(EXTRACT(EPOCH FROM (:now::timestamp - r."createdAt")) / 3600 + :timeOffset, 0.1), + :gravity + )) as score + FROM revisions r + INNER JOIN batch_docs bd ON r."documentId" = bd.id + WHERE r."createdAt" >= :threshold + GROUP BY r."documentId" + ), + comment_scores AS ( + SELECT + c."documentId", + SUM(:commentWeight / POWER( + GREATEST(EXTRACT(EPOCH FROM (:now::timestamp - c."createdAt")) / 3600 + :timeOffset, 0.1), + :gravity + )) as score + FROM comments c + INNER JOIN batch_docs bd ON c."documentId" = bd.id + WHERE c."createdAt" >= :threshold + GROUP BY c."documentId" + ), + view_scores AS ( + SELECT + v."documentId", + SUM(:viewWeight / POWER( + GREATEST(EXTRACT(EPOCH FROM (:now::timestamp - v."updatedAt")) / 3600 + :timeOffset, 0.1), + :gravity + )) as score + FROM views v + INNER JOIN batch_docs bd ON v."documentId" = bd.id + WHERE v."updatedAt" >= :threshold + GROUP BY v."documentId" + ), + combined_scores AS ( + SELECT + bd.id as "documentId", + COALESCE(rs.score, 0) + COALESCE(cs.score, 0) + COALESCE(vs.score, 0) as total_score + FROM batch_docs bd + LEFT JOIN revision_scores rs ON bd.id = rs."documentId" + LEFT JOIN comment_scores cs ON bd.id = cs."documentId" + LEFT JOIN view_scores vs ON bd.id = vs."documentId" + ) + UPDATE documents + SET "popularityScore" = combined_scores.total_score + FROM combined_scores + WHERE documents.id = combined_scores."documentId" + `, + { + replacements: { + threshold: activityThreshold, + now, + gravity: GRAVITY, + timeOffset: TIME_OFFSET_HOURS, + revisionWeight: ACTIVITY_WEIGHTS.revision, + commentWeight: ACTIVITY_WEIGHTS.comment, + viewWeight: ACTIVITY_WEIGHTS.view, + }, + transaction, + } + ); + + // Mark batch as processed + await sequelize.query( + ` + UPDATE ${this.workingTable} + SET processed = TRUE + WHERE "documentId" IN (SELECT id FROM (VALUES ${valuesClause}) AS t(id)) + `, + { transaction } + ); + + return documentIds.length; + }); + } catch (error) { + if (attempt < MAX_RETRIES) { + Logger.warn( + `Batch update failed, retrying (attempt ${attempt + 1}/${MAX_RETRIES})`, + { error } + ); + await this.sleep(RETRY_DELAY_MS * attempt); + return this.processBatchWithRetry(activityThreshold, now, attempt + 1); + } + throw error; + } + } + + /** + * Marks current batch as processed without updating scores. + * Used when a batch fails repeatedly to prevent infinite loops. + */ + private async skipCurrentBatch(): Promise { + await sequelize.query( + ` + UPDATE ${this.workingTable} + SET processed = TRUE + WHERE "documentId" IN ( + SELECT "documentId" FROM ${this.workingTable} + WHERE NOT processed + ORDER BY "documentId" + LIMIT :limit + ) + `, + { replacements: { limit: BATCH_SIZE } } + ); + } + + /** + * Removes the working table + */ + private async cleanupWorkingTable(): Promise { + try { + await sequelize.query( + `DROP TABLE IF EXISTS ${this.workingTable} CASCADE` + ); + } catch (error) { + Logger.warn("Failed to clean up working table", { error }); + } + } + + private sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); + } +} diff --git a/server/routes/api/documents/schema.ts b/server/routes/api/documents/schema.ts index fca315f0f6..9b6f132d52 100644 --- a/server/routes/api/documents/schema.ts +++ b/server/routes/api/documents/schema.ts @@ -13,7 +13,14 @@ const DocumentsSortParamsSchema = z.object({ sort: z .string() .refine((val) => - ["createdAt", "updatedAt", "publishedAt", "index", "title"].includes(val) + [ + "createdAt", + "updatedAt", + "publishedAt", + "index", + "title", + "popularityScore", + ].includes(val) ) .default("updatedAt"), diff --git a/shared/i18n/locales/en_US/translation.json b/shared/i18n/locales/en_US/translation.json index daa801a50e..cac6c6f613 100644 --- a/shared/i18n/locales/en_US/translation.json +++ b/shared/i18n/locales/en_US/translation.json @@ -666,6 +666,7 @@ "Sorry, an error occurred saving the collection": "Sorry, an error occurred saving the collection", "Add a description": "Add a description", "Overview": "Overview", + "Popular": "Popular", "Recently updated": "Recently updated", "Recently published": "Recently published", "Least recently updated": "Least recently updated", @@ -783,7 +784,8 @@ "Something went wrong": "Something went wrong", "Sorry, an unknown error occurred loading the page. Please try again or contact support if the issue persists.": "Sorry, an unknown error occurred loading the page. Please try again or contact support if the issue persists.", "Created by me": "Created by me", - "Weird, this shouldn’t ever be empty": "Weird, this shouldn’t ever be empty", + "Weird, this shouldn't ever be empty": "Weird, this shouldn't ever be empty", + "Documents with recent activity will appear here": "Documents with recent activity will appear here", "You haven’t created any documents yet": "You haven’t created any documents yet", "Documents you’ve recently viewed will be here for easy access": "Documents you’ve recently viewed will be here for easy access", "{{ count }} invites sent": "{{ count }} invites sent",