From 600108bc436ee7671475d033b15221747dd553e4 Mon Sep 17 00:00:00 2001 From: Tom Moor Date: Sat, 18 Apr 2026 08:11:15 -0400 Subject: [PATCH] feat: Document insight rollups (#12086) * First pass * Remove popularity changes * Address review feedback - Compute retention cutoff in UTC from the database rather than worker-local TZ - Push partition predicate into rollup source CTEs to avoid full-table scans per partition Co-Authored-By: Claude Opus 4.7 * Anchor insight rollups to UTC and include today Co-Authored-By: Claude Opus 4.7 --------- Co-authored-by: Claude Opus 4.7 --- ...20260416000000-create-document-insights.js | 96 +++++++ server/models/DocumentInsight.ts | 72 +++++ server/models/index.ts | 2 + server/presenters/documentInsight.ts | 13 + server/presenters/index.ts | 2 + ...CleanupExpiredDocumentInsightsTask.test.ts | 42 +++ .../CleanupExpiredDocumentInsightsTask.ts | 42 +++ .../tasks/RollupDocumentInsightsTask.test.ts | 247 ++++++++++++++++++ .../tasks/RollupDocumentInsightsTask.ts | 151 +++++++++++ server/routes/api/documents/documents.ts | 38 +++ server/routes/api/documents/schema.ts | 19 ++ ...260416000000-backfill-document-insights.ts | 129 +++++++++ server/test/factories.ts | 48 ++-- 13 files changed, 880 insertions(+), 21 deletions(-) create mode 100644 server/migrations/20260416000000-create-document-insights.js create mode 100644 server/models/DocumentInsight.ts create mode 100644 server/presenters/documentInsight.ts create mode 100644 server/queues/tasks/CleanupExpiredDocumentInsightsTask.test.ts create mode 100644 server/queues/tasks/CleanupExpiredDocumentInsightsTask.ts create mode 100644 server/queues/tasks/RollupDocumentInsightsTask.test.ts create mode 100644 server/queues/tasks/RollupDocumentInsightsTask.ts create mode 100644 server/scripts/20260416000000-backfill-document-insights.ts diff --git a/server/migrations/20260416000000-create-document-insights.js b/server/migrations/20260416000000-create-document-insights.js new file mode 100644 index 0000000000..794caec6fc --- /dev/null +++ b/server/migrations/20260416000000-create-document-insights.js @@ -0,0 +1,96 @@ +"use strict"; + +module.exports = { + async up(queryInterface, Sequelize) { + return queryInterface.sequelize.transaction(async (transaction) => { + await queryInterface.createTable( + "document_insights", + { + id: { + type: Sequelize.UUID, + allowNull: false, + defaultValue: Sequelize.UUIDV4, + primaryKey: true, + }, + documentId: { + type: Sequelize.UUID, + allowNull: false, + references: { + model: "documents", + key: "id", + }, + onDelete: "CASCADE", + }, + teamId: { + type: Sequelize.UUID, + allowNull: false, + references: { + model: "teams", + key: "id", + }, + onDelete: "CASCADE", + }, + date: { + type: Sequelize.DATEONLY, + allowNull: false, + }, + viewCount: { + type: Sequelize.INTEGER, + allowNull: false, + defaultValue: 0, + }, + viewerCount: { + type: Sequelize.INTEGER, + allowNull: false, + defaultValue: 0, + }, + commentCount: { + type: Sequelize.INTEGER, + allowNull: false, + defaultValue: 0, + }, + reactionCount: { + type: Sequelize.INTEGER, + allowNull: false, + defaultValue: 0, + }, + revisionCount: { + type: Sequelize.INTEGER, + allowNull: false, + defaultValue: 0, + }, + editorCount: { + type: Sequelize.INTEGER, + allowNull: false, + defaultValue: 0, + }, + createdAt: { + type: Sequelize.DATE, + allowNull: false, + }, + updatedAt: { + type: Sequelize.DATE, + allowNull: false, + }, + }, + { transaction } + ); + + await queryInterface.addIndex( + "document_insights", + ["documentId", "date"], + { unique: true, transaction } + ); + + await queryInterface.addIndex("document_insights", ["teamId", "date"], { + transaction, + }); + }); + }, + + async down(queryInterface) { + return queryInterface.sequelize.transaction(async (transaction) => { + await queryInterface.dropTable("document_insights", { transaction }); + }); + }, +}; diff --git a/server/models/DocumentInsight.ts b/server/models/DocumentInsight.ts new file mode 100644 index 0000000000..591527635b --- /dev/null +++ b/server/models/DocumentInsight.ts @@ -0,0 +1,72 @@ +import type { InferAttributes, InferCreationAttributes } from "sequelize"; +import { + BelongsTo, + Column, + DataType, + Default, + ForeignKey, + Table, +} from "sequelize-typescript"; +import Document from "./Document"; +import Team from "./Team"; +import IdModel from "./base/IdModel"; +import { SkipChangeset } from "./decorators/Changeset"; +import Fix from "./decorators/Fix"; + +@Table({ tableName: "document_insights", modelName: "documentInsight" }) +@Fix +class DocumentInsight extends IdModel< + InferAttributes, + Partial> +> { + @Column(DataType.DATEONLY) + date: string; + + @Default(0) + @Column(DataType.INTEGER) + @SkipChangeset + viewCount: number; + + @Default(0) + @Column(DataType.INTEGER) + @SkipChangeset + viewerCount: number; + + @Default(0) + @Column(DataType.INTEGER) + @SkipChangeset + commentCount: number; + + @Default(0) + @Column(DataType.INTEGER) + @SkipChangeset + reactionCount: number; + + @Default(0) + @Column(DataType.INTEGER) + @SkipChangeset + revisionCount: number; + + @Default(0) + @Column(DataType.INTEGER) + @SkipChangeset + editorCount: number; + + // associations + + @BelongsTo(() => Document, "documentId") + document: Document; + + @ForeignKey(() => Document) + @Column(DataType.UUID) + documentId: string; + + @BelongsTo(() => Team, "teamId") + team: Team; + + @ForeignKey(() => Team) + @Column(DataType.UUID) + teamId: string; +} + +export default DocumentInsight; diff --git a/server/models/index.ts b/server/models/index.ts index af549e5426..4be4a88e16 100644 --- a/server/models/index.ts +++ b/server/models/index.ts @@ -16,6 +16,8 @@ export { default as Comment } from "./Comment"; export { default as Document } from "./Document"; +export { default as DocumentInsight } from "./DocumentInsight"; + export { default as Event } from "./Event"; export { default as ExternalGroup } from "./ExternalGroup"; diff --git a/server/presenters/documentInsight.ts b/server/presenters/documentInsight.ts new file mode 100644 index 0000000000..5ad673d204 --- /dev/null +++ b/server/presenters/documentInsight.ts @@ -0,0 +1,13 @@ +import type { DocumentInsight } from "@server/models"; + +export default function presentDocumentInsight(insight: DocumentInsight) { + return { + date: insight.date, + viewCount: insight.viewCount, + viewerCount: insight.viewerCount, + commentCount: insight.commentCount, + reactionCount: insight.reactionCount, + revisionCount: insight.revisionCount, + editorCount: insight.editorCount, + }; +} diff --git a/server/presenters/index.ts b/server/presenters/index.ts index 306aeceaff..60249226a2 100644 --- a/server/presenters/index.ts +++ b/server/presenters/index.ts @@ -5,6 +5,7 @@ import presentAvailableTeam from "./availableTeam"; import presentCollection from "./collection"; import presentComment from "./comment"; import presentDocument, { presentDocuments } from "./document"; +import presentDocumentInsight from "./documentInsight"; import presentEvent from "./event"; import presentExternalGroup from "./externalGroup"; import presentFileOperation from "./fileOperation"; @@ -42,6 +43,7 @@ export { presentComment, presentDocument, presentDocuments, + presentDocumentInsight, presentEvent, presentExternalGroup, presentFileOperation, diff --git a/server/queues/tasks/CleanupExpiredDocumentInsightsTask.test.ts b/server/queues/tasks/CleanupExpiredDocumentInsightsTask.test.ts new file mode 100644 index 0000000000..300f1ca835 --- /dev/null +++ b/server/queues/tasks/CleanupExpiredDocumentInsightsTask.test.ts @@ -0,0 +1,42 @@ +import { format, subDays } from "date-fns"; +import { DocumentInsight } from "@server/models"; +import { buildDocument, buildTeam } from "@server/test/factories"; +import CleanupExpiredDocumentInsightsTask from "./CleanupExpiredDocumentInsightsTask"; + +const daysAgo = (n: number) => subDays(new Date(), n); +const dayStr = (d: Date) => format(d, "yyyy-MM-dd"); + +describe("CleanupExpiredDocumentInsightsTask", () => { + it("deletes rows older than the retention window", async () => { + const team = await buildTeam(); + const document = await buildDocument({ + teamId: team.id, + publishedAt: new Date(), + }); + + await DocumentInsight.create({ + documentId: document.id, + teamId: team.id, + date: dayStr(daysAgo(400)), + viewCount: 1, + }); + await DocumentInsight.create({ + documentId: document.id, + teamId: team.id, + date: dayStr(daysAgo(5)), + viewCount: 1, + }); + + await new CleanupExpiredDocumentInsightsTask().perform(); + + const dates = ( + await DocumentInsight.findAll({ + where: { documentId: document.id }, + order: [["date", "ASC"]], + }) + ).map((i) => i.date); + + expect(dates).not.toContain(dayStr(daysAgo(400))); + expect(dates).toContain(dayStr(daysAgo(5))); + }); +}); diff --git a/server/queues/tasks/CleanupExpiredDocumentInsightsTask.ts b/server/queues/tasks/CleanupExpiredDocumentInsightsTask.ts new file mode 100644 index 0000000000..7a246e2604 --- /dev/null +++ b/server/queues/tasks/CleanupExpiredDocumentInsightsTask.ts @@ -0,0 +1,42 @@ +import { Op, literal } from "sequelize"; +import { Minute } from "@shared/utils/time"; +import Logger from "@server/logging/Logger"; +import { DocumentInsight } from "@server/models"; +import { TaskPriority } from "./base/BaseTask"; +import { CronTask, TaskInterval } from "./base/CronTask"; + +/** + * Number of days of rollup history to retain. + */ +const RETENTION_DAYS = 365; + +export default class CleanupExpiredDocumentInsightsTask extends CronTask { + public async perform() { + // Derive the cutoff in UTC from the database so retention isn't affected + // by the worker's local timezone. `date` is stored as a UTC DATE. + const cutoff = literal( + `(NOW() AT TIME ZONE 'UTC')::date - INTERVAL '${RETENTION_DAYS} days'` + ); + const deleted = await DocumentInsight.destroy({ + where: { date: { [Op.lt]: cutoff } }, + }); + + if (deleted > 0) { + Logger.info("task", `Deleted ${deleted} expired document_insights rows`); + } + } + + public get cron() { + return { + interval: TaskInterval.Day, + partitionWindow: 30 * Minute.ms, + }; + } + + public get options() { + return { + attempts: 1, + priority: TaskPriority.Background, + }; + } +} diff --git a/server/queues/tasks/RollupDocumentInsightsTask.test.ts b/server/queues/tasks/RollupDocumentInsightsTask.test.ts new file mode 100644 index 0000000000..4c0b72d39d --- /dev/null +++ b/server/queues/tasks/RollupDocumentInsightsTask.test.ts @@ -0,0 +1,247 @@ +import { format, subDays } from "date-fns"; +import { DocumentInsight, Event, Reaction, Revision } from "@server/models"; +import { + buildComment, + buildDocument, + buildTeam, + buildUser, +} from "@server/test/factories"; +import RollupDocumentInsightsTask from "./RollupDocumentInsightsTask"; + +const props = { + limit: 10000, + partition: { + partitionIndex: 0, + partitionCount: 1, + }, +}; + +const daysAgo = (n: number) => subDays(new Date(), n); +const dayStr = (d: Date) => format(d, "yyyy-MM-dd"); + +describe("RollupDocumentInsightsTask", () => { + let task: RollupDocumentInsightsTask; + + beforeAll(() => { + jest.setTimeout(30000); + }); + + beforeEach(() => { + task = new RollupDocumentInsightsTask(); + }); + + it("writes nothing when no source activity exists", async () => { + const team = await buildTeam(); + await buildDocument({ teamId: team.id }); + + await task.perform(props); + + const count = await DocumentInsight.count({ where: { teamId: team.id } }); + expect(count).toBe(0); + }); + + it("rolls up view events into viewCount and viewerCount", async () => { + const team = await buildTeam(); + const userA = await buildUser({ teamId: team.id }); + const userB = await buildUser({ teamId: team.id }); + const document = await buildDocument({ + teamId: team.id, + publishedAt: new Date(), + }); + + const yesterday = daysAgo(1); + await Event.create({ + name: "views.create", + teamId: team.id, + userId: userA.id, + documentId: document.id, + createdAt: yesterday, + }); + await Event.create({ + name: "views.create", + teamId: team.id, + userId: userA.id, + documentId: document.id, + createdAt: yesterday, + }); + await Event.create({ + name: "views.create", + teamId: team.id, + userId: userB.id, + documentId: document.id, + createdAt: yesterday, + }); + + await task.perform(props); + + const insight = await DocumentInsight.findOne({ + where: { documentId: document.id, date: dayStr(yesterday) }, + }); + expect(insight).toBeTruthy(); + expect(insight!.viewCount).toBe(3); + expect(insight!.viewerCount).toBe(2); + }); + + it("rolls up comments and reactions without double-counting", async () => { + const team = await buildTeam(); + const user = await buildUser({ teamId: team.id }); + const document = await buildDocument({ + teamId: team.id, + publishedAt: new Date(), + }); + + const yesterday = daysAgo(1); + // Older comment that only receives a reaction yesterday. + const olderComment = await buildComment({ + documentId: document.id, + userId: user.id, + createdAt: daysAgo(10), + }); + + // New comment yesterday. + await buildComment({ + documentId: document.id, + userId: user.id, + createdAt: yesterday, + }); + + // Reaction on the older comment, but created yesterday. + await Reaction.create( + { + emoji: "🎉", + commentId: olderComment.id, + userId: user.id, + createdAt: yesterday, + }, + { hooks: false } + ); + + await task.perform(props); + + const insight = await DocumentInsight.findOne({ + where: { documentId: document.id, date: dayStr(yesterday) }, + }); + expect(insight).toBeTruthy(); + expect(insight!.commentCount).toBe(1); + expect(insight!.reactionCount).toBe(1); + }); + + it("rolls up revisions into revisionCount and editorCount", async () => { + const team = await buildTeam(); + const userA = await buildUser({ teamId: team.id }); + const userB = await buildUser({ teamId: team.id }); + const document = await buildDocument({ + teamId: team.id, + publishedAt: new Date(), + }); + + const yesterday = daysAgo(1); + await Revision.create({ + documentId: document.id, + userId: userA.id, + title: document.title, + text: "A", + createdAt: yesterday, + }); + await Revision.create({ + documentId: document.id, + userId: userA.id, + title: document.title, + text: "B", + createdAt: yesterday, + }); + await Revision.create({ + documentId: document.id, + userId: userB.id, + title: document.title, + text: "C", + createdAt: yesterday, + }); + + await task.perform(props); + + const insight = await DocumentInsight.findOne({ + where: { documentId: document.id, date: dayStr(yesterday) }, + }); + expect(insight).toBeTruthy(); + expect(insight!.revisionCount).toBe(3); + expect(insight!.editorCount).toBe(2); + }); + + it("is idempotent when re-run for the same day", async () => { + const team = await buildTeam(); + const user = await buildUser({ teamId: team.id }); + const document = await buildDocument({ + teamId: team.id, + publishedAt: new Date(), + }); + + const yesterday = daysAgo(1); + await Event.create({ + name: "views.create", + teamId: team.id, + userId: user.id, + documentId: document.id, + createdAt: yesterday, + }); + + await task.perform(props); + await task.perform(props); + + const rows = await DocumentInsight.findAll({ + where: { documentId: document.id, date: dayStr(yesterday) }, + }); + expect(rows).toHaveLength(1); + expect(rows[0].viewCount).toBe(1); + }); + + it("excludes deleted documents", async () => { + const team = await buildTeam(); + const user = await buildUser({ teamId: team.id }); + const document = await buildDocument({ + teamId: team.id, + publishedAt: new Date(), + deletedAt: new Date(), + }); + + await Event.create({ + name: "views.create", + teamId: team.id, + userId: user.id, + documentId: document.id, + createdAt: daysAgo(1), + }); + + await task.perform(props); + + const count = await DocumentInsight.count({ + where: { documentId: document.id }, + }); + expect(count).toBe(0); + }); + + it("includes unpublished documents", async () => { + const team = await buildTeam(); + const user = await buildUser({ teamId: team.id }); + const draft = await buildDocument({ + teamId: team.id, + publishedAt: undefined, + }); + + await Revision.create({ + documentId: draft.id, + userId: user.id, + title: draft.title, + text: "x", + createdAt: daysAgo(1), + }); + + await task.perform(props); + + const insight = await DocumentInsight.findOne({ + where: { documentId: draft.id, date: dayStr(daysAgo(1)) }, + }); + expect(insight).toBeTruthy(); + expect(insight!.revisionCount).toBe(1); + }); +}); diff --git a/server/queues/tasks/RollupDocumentInsightsTask.ts b/server/queues/tasks/RollupDocumentInsightsTask.ts new file mode 100644 index 0000000000..0466ed77c0 --- /dev/null +++ b/server/queues/tasks/RollupDocumentInsightsTask.ts @@ -0,0 +1,151 @@ +import { QueryTypes } from "sequelize"; +import { Day, Minute } from "@shared/utils/time"; +import Logger from "@server/logging/Logger"; +import { sequelize } from "@server/storage/database"; +import { TaskPriority } from "./base/BaseTask"; +import type { Props } from "./base/CronTask"; +import { CronTask, TaskInterval } from "./base/CronTask"; + +/** + * Number of recent days to (re)compute on each run, in addition to the current + * day. Reprocessing the most recent days lets late-arriving writes (slow + * workers, out-of-order event emission) settle into the rollup. The upsert is + * idempotent. + */ +const RECOMPUTE_DAYS = 2; + +export default class RollupDocumentInsightsTask extends CronTask { + public async perform({ partition }: Props) { + const [startUuid, endUuid] = this.getPartitionBounds(partition); + + for (let offset = RECOMPUTE_DAYS; offset >= 0; offset--) { + const date = new Date(Date.now() - offset * Day.ms) + .toISOString() + .slice(0, 10); + await this.rollupDay(date, startUuid, endUuid); + } + } + + private async rollupDay( + date: string, + startUuid: string, + endUuid: string + ): Promise { + const [{ upserted }] = await sequelize.query<{ upserted: string }>( + ` + WITH partitioned_documents AS ( + SELECT id, "teamId" + FROM documents + WHERE "deletedAt" IS NULL + AND id >= :startUuid::uuid + AND id <= :endUuid::uuid + ), + view_counts AS ( + SELECT + e."documentId", + COUNT(*) AS view_count, + COUNT(DISTINCT e."userId") AS viewer_count + FROM events e + INNER JOIN partitioned_documents pd ON pd.id = e."documentId" + WHERE e.name = 'views.create' + AND e."createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC' + AND e."createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC' + GROUP BY e."documentId" + ), + comment_counts AS ( + SELECT c."documentId", COUNT(*) AS comment_count + FROM comments c + INNER JOIN partitioned_documents pd ON pd.id = c."documentId" + WHERE c."createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC' + AND c."createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC' + GROUP BY c."documentId" + ), + reaction_counts AS ( + SELECT c."documentId", COUNT(rx.id) AS reaction_count + FROM reactions rx + INNER JOIN comments c ON c.id = rx."commentId" + INNER JOIN partitioned_documents pd ON pd.id = c."documentId" + WHERE rx."createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC' + AND rx."createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC' + GROUP BY c."documentId" + ), + revision_counts AS ( + SELECT + r."documentId", + COUNT(*) AS revision_count, + COUNT(DISTINCT r."userId") AS editor_count + FROM revisions r + INNER JOIN partitioned_documents pd ON pd.id = r."documentId" + WHERE r."createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC' + AND r."createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC' + GROUP BY r."documentId" + ), + active AS ( + SELECT "documentId" FROM view_counts + UNION SELECT "documentId" FROM comment_counts + UNION SELECT "documentId" FROM reaction_counts + UNION SELECT "documentId" FROM revision_counts + ), + inserted AS ( + INSERT INTO document_insights ( + id, "documentId", "teamId", date, + "viewCount", "viewerCount", + "commentCount", "reactionCount", + "revisionCount", "editorCount", + "createdAt", "updatedAt" + ) + SELECT + uuid_generate_v4(), + pd.id, + pd."teamId", + :dayStart::date, + COALESCE(v.view_count, 0), + COALESCE(v.viewer_count, 0), + COALESCE(c.comment_count, 0), + COALESCE(rx.reaction_count, 0), + COALESCE(r.revision_count, 0), + COALESCE(r.editor_count, 0), + NOW(), NOW() + FROM active a + INNER JOIN partitioned_documents pd ON pd.id = a."documentId" + LEFT JOIN view_counts v ON v."documentId" = pd.id + LEFT JOIN comment_counts c ON c."documentId" = pd.id + LEFT JOIN reaction_counts rx ON rx."documentId" = pd.id + LEFT JOIN revision_counts r ON r."documentId" = pd.id + ON CONFLICT ("documentId", date) DO UPDATE SET + "viewCount" = EXCLUDED."viewCount", + "viewerCount" = EXCLUDED."viewerCount", + "commentCount" = EXCLUDED."commentCount", + "reactionCount" = EXCLUDED."reactionCount", + "revisionCount" = EXCLUDED."revisionCount", + "editorCount" = EXCLUDED."editorCount", + "updatedAt" = NOW() + RETURNING 1 + ) + SELECT COUNT(*)::text AS upserted FROM inserted + `, + { + replacements: { dayStart: date, startUuid, endUuid }, + type: QueryTypes.SELECT, + } + ); + + Logger.info("task", `Rolled up document insights for ${date}`, { + upserted: parseInt(upserted, 10), + }); + } + + public get cron() { + return { + interval: TaskInterval.Day, + partitionWindow: 30 * Minute.ms, + }; + } + + public get options() { + return { + attempts: 1, + priority: TaskPriority.Background, + }; + } +} diff --git a/server/routes/api/documents/documents.ts b/server/routes/api/documents/documents.ts index d8b9aeac2a..eeb2167bbf 100644 --- a/server/routes/api/documents/documents.ts +++ b/server/routes/api/documents/documents.ts @@ -24,6 +24,7 @@ import { } from "@shared/types"; import { subtractDate } from "@shared/utils/date"; import slugify from "@shared/utils/slugify"; +import { Day } from "@shared/utils/time"; import documentCreator from "@server/commands/documentCreator"; import documentDuplicator from "@server/commands/documentDuplicator"; import documentLoader from "@server/commands/documentLoader"; @@ -49,6 +50,7 @@ import { Relationship, Collection, Document, + DocumentInsight, Event, Revision, SearchQuery, @@ -69,6 +71,7 @@ import { TextHelper } from "@server/models/helpers/TextHelper"; import { authorize, cannot } from "@server/policies"; import { presentDocument, + presentDocumentInsight, presentDocuments, presentPolicies, presentTemplate, @@ -621,6 +624,41 @@ router.post( } ); +router.post( + "documents.insights", + auth(), + validate(T.DocumentsInsightsSchema), + async (ctx: APIContext) => { + const { id, startDate, endDate } = ctx.input.body; + const { user } = ctx.state.auth; + + const document = await Document.findByPk(id, { userId: user.id }); + authorize(user, "listViews", document); + + if (!document.insightsEnabled) { + throw ValidationError("Insights are not enabled for this document"); + } + + const end = endDate ?? new Date(); + const start = startDate ?? new Date(end.getTime() - 30 * Day.ms); + + const insights = await DocumentInsight.findAll({ + where: { + documentId: document.id, + date: { + [Op.gte]: start.toISOString().slice(0, 10), + [Op.lte]: end.toISOString().slice(0, 10), + }, + }, + order: [["date", "ASC"]], + }); + + ctx.body = { + data: insights.map(presentDocumentInsight), + }; + } +); + router.post( "documents.users", auth(), diff --git a/server/routes/api/documents/schema.ts b/server/routes/api/documents/schema.ts index d4e176d0d4..68e8f12028 100644 --- a/server/routes/api/documents/schema.ts +++ b/server/routes/api/documents/schema.ts @@ -154,6 +154,25 @@ export const DocumentsInfoSchema = BaseSchema.extend({ export type DocumentsInfoReq = z.infer; +export const DocumentsInsightsSchema = BaseSchema.extend({ + body: BaseIdSchema.extend({ + /** Start of the insights window (inclusive). Defaults to 30 days ago. */ + startDate: z.coerce.date().optional(), + /** End of the insights window (inclusive). Defaults to today. */ + endDate: z.coerce.date().optional(), + }), +}).refine( + (req) => + !req.body.startDate || + !req.body.endDate || + req.body.startDate <= req.body.endDate, + { + message: "startDate must be on or before endDate", + } +); + +export type DocumentsInsightsReq = z.infer; + export const DocumentsExportSchema = BaseSchema.extend({ body: BaseIdSchema.extend({ signedUrls: z.number().optional(), diff --git a/server/scripts/20260416000000-backfill-document-insights.ts b/server/scripts/20260416000000-backfill-document-insights.ts new file mode 100644 index 0000000000..24ec3300a7 --- /dev/null +++ b/server/scripts/20260416000000-backfill-document-insights.ts @@ -0,0 +1,129 @@ +import "./bootstrap"; +import { QueryTypes } from "sequelize"; +import { Day } from "@shared/utils/time"; +import { sequelize } from "@server/storage/database"; + +const DEFAULT_DAYS = 14; + +const days = parseInt(process.argv[2], 10); +const backfillDays = Number.isNaN(days) ? DEFAULT_DAYS : days; + +/** + * Populates document_insights with one row per (document, day) for each day + * within the backfill window that has source activity. Safe to re-run — the + * upsert keys on (documentId, date). Source ranges are half-open + * [dayStart, dayStart + 1) in UTC so events land in exactly one day. + */ +async function backfillDay(date: string): Promise { + const [{ upserted }] = await sequelize.query<{ upserted: string }>( + ` + WITH view_counts AS ( + SELECT + "documentId", + COUNT(*) AS view_count, + COUNT(DISTINCT "userId") AS viewer_count + FROM events + WHERE name = 'views.create' + AND "createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC' + AND "createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC' + GROUP BY "documentId" + ), + comment_counts AS ( + SELECT "documentId", COUNT(*) AS comment_count + FROM comments + WHERE "createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC' + AND "createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC' + GROUP BY "documentId" + ), + reaction_counts AS ( + SELECT c."documentId", COUNT(rx.id) AS reaction_count + FROM reactions rx + INNER JOIN comments c ON c.id = rx."commentId" + WHERE rx."createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC' + AND rx."createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC' + GROUP BY c."documentId" + ), + revision_counts AS ( + SELECT + "documentId", + COUNT(*) AS revision_count, + COUNT(DISTINCT "userId") AS editor_count + FROM revisions + WHERE "createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC' + AND "createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC' + GROUP BY "documentId" + ), + active AS ( + SELECT "documentId" FROM view_counts + UNION SELECT "documentId" FROM comment_counts + UNION SELECT "documentId" FROM reaction_counts + UNION SELECT "documentId" FROM revision_counts + ), + inserted AS ( + INSERT INTO document_insights ( + id, "documentId", "teamId", date, + "viewCount", "viewerCount", + "commentCount", "reactionCount", + "revisionCount", "editorCount", + "createdAt", "updatedAt" + ) + SELECT + uuid_generate_v4(), + d.id, + d."teamId", + :dayStart::date, + COALESCE(v.view_count, 0), + COALESCE(v.viewer_count, 0), + COALESCE(c.comment_count, 0), + COALESCE(rx.reaction_count, 0), + COALESCE(r.revision_count, 0), + COALESCE(r.editor_count, 0), + NOW(), NOW() + FROM active a + INNER JOIN documents d ON d.id = a."documentId" + LEFT JOIN view_counts v ON v."documentId" = d.id + LEFT JOIN comment_counts c ON c."documentId" = d.id + LEFT JOIN reaction_counts rx ON rx."documentId" = d.id + LEFT JOIN revision_counts r ON r."documentId" = d.id + WHERE d."deletedAt" IS NULL + ON CONFLICT ("documentId", date) DO UPDATE SET + "viewCount" = EXCLUDED."viewCount", + "viewerCount" = EXCLUDED."viewerCount", + "commentCount" = EXCLUDED."commentCount", + "reactionCount" = EXCLUDED."reactionCount", + "revisionCount" = EXCLUDED."revisionCount", + "editorCount" = EXCLUDED."editorCount", + "updatedAt" = NOW() + RETURNING 1 + ) + SELECT COUNT(*)::text AS upserted FROM inserted + `, + { + replacements: { dayStart: date }, + type: QueryTypes.SELECT, + } + ); + + return parseInt(upserted, 10); +} + +async function main() { + console.log(`Backfilling ${backfillDays} days of document insights…`); + + for (let offset = backfillDays; offset >= 1; offset--) { + const date = new Date(Date.now() - offset * Day.ms) + .toISOString() + .slice(0, 10); + const upserted = await backfillDay(date); + console.log(` ${date}: ${upserted} rows`); + } + + console.log("Backfill complete"); + process.exit(0); +} + +if (process.env.NODE_ENV !== "test") { + void main(); +} + +export default main; diff --git a/server/test/factories.ts b/server/test/factories.ts index 0315dd111a..8f939a993d 100644 --- a/server/test/factories.ts +++ b/server/test/factories.ts @@ -484,29 +484,35 @@ export async function buildComment(overrides: { parentCommentId?: string; resolvedById?: string; reactions?: ReactionSummary[]; + createdAt?: Date; }) { - const comment = await Comment.create({ - resolvedById: overrides.resolvedById, - parentCommentId: overrides.parentCommentId, - documentId: overrides.documentId, - data: { - type: "doc", - content: [ - { - type: "paragraph", - content: [ - { - content: [], - type: "text", - text: "test", - }, - ], - }, - ], + const comment = await Comment.create( + { + resolvedById: overrides.resolvedById, + parentCommentId: overrides.parentCommentId, + documentId: overrides.documentId, + data: { + type: "doc", + content: [ + { + type: "paragraph", + content: [ + { + content: [], + type: "text", + text: "test", + }, + ], + }, + ], + }, + createdById: overrides.userId, + reactions: overrides.reactions, + createdAt: overrides.createdAt, + updatedAt: overrides.createdAt, }, - createdById: overrides.userId, - reactions: overrides.reactions, - }); + { silent: overrides.createdAt ? true : false } + ); return comment; }