mirror of
https://github.com/outline/outline.git
synced 2026-06-13 11:25:03 +03:00
feat: Document insight rollups (#12086)
* First pass * Remove popularity changes * Address review feedback - Compute retention cutoff in UTC from the database rather than worker-local TZ - Push partition predicate into rollup source CTEs to avoid full-table scans per partition Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * Anchor insight rollups to UTC and include today Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,96 @@
|
|||||||
|
"use strict";
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
async up(queryInterface, Sequelize) {
|
||||||
|
return queryInterface.sequelize.transaction(async (transaction) => {
|
||||||
|
await queryInterface.createTable(
|
||||||
|
"document_insights",
|
||||||
|
{
|
||||||
|
id: {
|
||||||
|
type: Sequelize.UUID,
|
||||||
|
allowNull: false,
|
||||||
|
defaultValue: Sequelize.UUIDV4,
|
||||||
|
primaryKey: true,
|
||||||
|
},
|
||||||
|
documentId: {
|
||||||
|
type: Sequelize.UUID,
|
||||||
|
allowNull: false,
|
||||||
|
references: {
|
||||||
|
model: "documents",
|
||||||
|
key: "id",
|
||||||
|
},
|
||||||
|
onDelete: "CASCADE",
|
||||||
|
},
|
||||||
|
teamId: {
|
||||||
|
type: Sequelize.UUID,
|
||||||
|
allowNull: false,
|
||||||
|
references: {
|
||||||
|
model: "teams",
|
||||||
|
key: "id",
|
||||||
|
},
|
||||||
|
onDelete: "CASCADE",
|
||||||
|
},
|
||||||
|
date: {
|
||||||
|
type: Sequelize.DATEONLY,
|
||||||
|
allowNull: false,
|
||||||
|
},
|
||||||
|
viewCount: {
|
||||||
|
type: Sequelize.INTEGER,
|
||||||
|
allowNull: false,
|
||||||
|
defaultValue: 0,
|
||||||
|
},
|
||||||
|
viewerCount: {
|
||||||
|
type: Sequelize.INTEGER,
|
||||||
|
allowNull: false,
|
||||||
|
defaultValue: 0,
|
||||||
|
},
|
||||||
|
commentCount: {
|
||||||
|
type: Sequelize.INTEGER,
|
||||||
|
allowNull: false,
|
||||||
|
defaultValue: 0,
|
||||||
|
},
|
||||||
|
reactionCount: {
|
||||||
|
type: Sequelize.INTEGER,
|
||||||
|
allowNull: false,
|
||||||
|
defaultValue: 0,
|
||||||
|
},
|
||||||
|
revisionCount: {
|
||||||
|
type: Sequelize.INTEGER,
|
||||||
|
allowNull: false,
|
||||||
|
defaultValue: 0,
|
||||||
|
},
|
||||||
|
editorCount: {
|
||||||
|
type: Sequelize.INTEGER,
|
||||||
|
allowNull: false,
|
||||||
|
defaultValue: 0,
|
||||||
|
},
|
||||||
|
createdAt: {
|
||||||
|
type: Sequelize.DATE,
|
||||||
|
allowNull: false,
|
||||||
|
},
|
||||||
|
updatedAt: {
|
||||||
|
type: Sequelize.DATE,
|
||||||
|
allowNull: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{ transaction }
|
||||||
|
);
|
||||||
|
|
||||||
|
await queryInterface.addIndex(
|
||||||
|
"document_insights",
|
||||||
|
["documentId", "date"],
|
||||||
|
{ unique: true, transaction }
|
||||||
|
);
|
||||||
|
|
||||||
|
await queryInterface.addIndex("document_insights", ["teamId", "date"], {
|
||||||
|
transaction,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
},
|
||||||
|
|
||||||
|
async down(queryInterface) {
|
||||||
|
return queryInterface.sequelize.transaction(async (transaction) => {
|
||||||
|
await queryInterface.dropTable("document_insights", { transaction });
|
||||||
|
});
|
||||||
|
},
|
||||||
|
};
|
||||||
@@ -0,0 +1,72 @@
|
|||||||
|
import type { InferAttributes, InferCreationAttributes } from "sequelize";
|
||||||
|
import {
|
||||||
|
BelongsTo,
|
||||||
|
Column,
|
||||||
|
DataType,
|
||||||
|
Default,
|
||||||
|
ForeignKey,
|
||||||
|
Table,
|
||||||
|
} from "sequelize-typescript";
|
||||||
|
import Document from "./Document";
|
||||||
|
import Team from "./Team";
|
||||||
|
import IdModel from "./base/IdModel";
|
||||||
|
import { SkipChangeset } from "./decorators/Changeset";
|
||||||
|
import Fix from "./decorators/Fix";
|
||||||
|
|
||||||
|
@Table({ tableName: "document_insights", modelName: "documentInsight" })
|
||||||
|
@Fix
|
||||||
|
class DocumentInsight extends IdModel<
|
||||||
|
InferAttributes<DocumentInsight>,
|
||||||
|
Partial<InferCreationAttributes<DocumentInsight>>
|
||||||
|
> {
|
||||||
|
@Column(DataType.DATEONLY)
|
||||||
|
date: string;
|
||||||
|
|
||||||
|
@Default(0)
|
||||||
|
@Column(DataType.INTEGER)
|
||||||
|
@SkipChangeset
|
||||||
|
viewCount: number;
|
||||||
|
|
||||||
|
@Default(0)
|
||||||
|
@Column(DataType.INTEGER)
|
||||||
|
@SkipChangeset
|
||||||
|
viewerCount: number;
|
||||||
|
|
||||||
|
@Default(0)
|
||||||
|
@Column(DataType.INTEGER)
|
||||||
|
@SkipChangeset
|
||||||
|
commentCount: number;
|
||||||
|
|
||||||
|
@Default(0)
|
||||||
|
@Column(DataType.INTEGER)
|
||||||
|
@SkipChangeset
|
||||||
|
reactionCount: number;
|
||||||
|
|
||||||
|
@Default(0)
|
||||||
|
@Column(DataType.INTEGER)
|
||||||
|
@SkipChangeset
|
||||||
|
revisionCount: number;
|
||||||
|
|
||||||
|
@Default(0)
|
||||||
|
@Column(DataType.INTEGER)
|
||||||
|
@SkipChangeset
|
||||||
|
editorCount: number;
|
||||||
|
|
||||||
|
// associations
|
||||||
|
|
||||||
|
@BelongsTo(() => Document, "documentId")
|
||||||
|
document: Document;
|
||||||
|
|
||||||
|
@ForeignKey(() => Document)
|
||||||
|
@Column(DataType.UUID)
|
||||||
|
documentId: string;
|
||||||
|
|
||||||
|
@BelongsTo(() => Team, "teamId")
|
||||||
|
team: Team;
|
||||||
|
|
||||||
|
@ForeignKey(() => Team)
|
||||||
|
@Column(DataType.UUID)
|
||||||
|
teamId: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default DocumentInsight;
|
||||||
@@ -16,6 +16,8 @@ export { default as Comment } from "./Comment";
|
|||||||
|
|
||||||
export { default as Document } from "./Document";
|
export { default as Document } from "./Document";
|
||||||
|
|
||||||
|
export { default as DocumentInsight } from "./DocumentInsight";
|
||||||
|
|
||||||
export { default as Event } from "./Event";
|
export { default as Event } from "./Event";
|
||||||
|
|
||||||
export { default as ExternalGroup } from "./ExternalGroup";
|
export { default as ExternalGroup } from "./ExternalGroup";
|
||||||
|
|||||||
@@ -0,0 +1,13 @@
|
|||||||
|
import type { DocumentInsight } from "@server/models";
|
||||||
|
|
||||||
|
export default function presentDocumentInsight(insight: DocumentInsight) {
|
||||||
|
return {
|
||||||
|
date: insight.date,
|
||||||
|
viewCount: insight.viewCount,
|
||||||
|
viewerCount: insight.viewerCount,
|
||||||
|
commentCount: insight.commentCount,
|
||||||
|
reactionCount: insight.reactionCount,
|
||||||
|
revisionCount: insight.revisionCount,
|
||||||
|
editorCount: insight.editorCount,
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -5,6 +5,7 @@ import presentAvailableTeam from "./availableTeam";
|
|||||||
import presentCollection from "./collection";
|
import presentCollection from "./collection";
|
||||||
import presentComment from "./comment";
|
import presentComment from "./comment";
|
||||||
import presentDocument, { presentDocuments } from "./document";
|
import presentDocument, { presentDocuments } from "./document";
|
||||||
|
import presentDocumentInsight from "./documentInsight";
|
||||||
import presentEvent from "./event";
|
import presentEvent from "./event";
|
||||||
import presentExternalGroup from "./externalGroup";
|
import presentExternalGroup from "./externalGroup";
|
||||||
import presentFileOperation from "./fileOperation";
|
import presentFileOperation from "./fileOperation";
|
||||||
@@ -42,6 +43,7 @@ export {
|
|||||||
presentComment,
|
presentComment,
|
||||||
presentDocument,
|
presentDocument,
|
||||||
presentDocuments,
|
presentDocuments,
|
||||||
|
presentDocumentInsight,
|
||||||
presentEvent,
|
presentEvent,
|
||||||
presentExternalGroup,
|
presentExternalGroup,
|
||||||
presentFileOperation,
|
presentFileOperation,
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
import { format, subDays } from "date-fns";
|
||||||
|
import { DocumentInsight } from "@server/models";
|
||||||
|
import { buildDocument, buildTeam } from "@server/test/factories";
|
||||||
|
import CleanupExpiredDocumentInsightsTask from "./CleanupExpiredDocumentInsightsTask";
|
||||||
|
|
||||||
|
const daysAgo = (n: number) => subDays(new Date(), n);
|
||||||
|
const dayStr = (d: Date) => format(d, "yyyy-MM-dd");
|
||||||
|
|
||||||
|
describe("CleanupExpiredDocumentInsightsTask", () => {
|
||||||
|
it("deletes rows older than the retention window", async () => {
|
||||||
|
const team = await buildTeam();
|
||||||
|
const document = await buildDocument({
|
||||||
|
teamId: team.id,
|
||||||
|
publishedAt: new Date(),
|
||||||
|
});
|
||||||
|
|
||||||
|
await DocumentInsight.create({
|
||||||
|
documentId: document.id,
|
||||||
|
teamId: team.id,
|
||||||
|
date: dayStr(daysAgo(400)),
|
||||||
|
viewCount: 1,
|
||||||
|
});
|
||||||
|
await DocumentInsight.create({
|
||||||
|
documentId: document.id,
|
||||||
|
teamId: team.id,
|
||||||
|
date: dayStr(daysAgo(5)),
|
||||||
|
viewCount: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
await new CleanupExpiredDocumentInsightsTask().perform();
|
||||||
|
|
||||||
|
const dates = (
|
||||||
|
await DocumentInsight.findAll({
|
||||||
|
where: { documentId: document.id },
|
||||||
|
order: [["date", "ASC"]],
|
||||||
|
})
|
||||||
|
).map((i) => i.date);
|
||||||
|
|
||||||
|
expect(dates).not.toContain(dayStr(daysAgo(400)));
|
||||||
|
expect(dates).toContain(dayStr(daysAgo(5)));
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
import { Op, literal } from "sequelize";
|
||||||
|
import { Minute } from "@shared/utils/time";
|
||||||
|
import Logger from "@server/logging/Logger";
|
||||||
|
import { DocumentInsight } from "@server/models";
|
||||||
|
import { TaskPriority } from "./base/BaseTask";
|
||||||
|
import { CronTask, TaskInterval } from "./base/CronTask";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of days of rollup history to retain.
|
||||||
|
*/
|
||||||
|
const RETENTION_DAYS = 365;
|
||||||
|
|
||||||
|
export default class CleanupExpiredDocumentInsightsTask extends CronTask {
|
||||||
|
public async perform() {
|
||||||
|
// Derive the cutoff in UTC from the database so retention isn't affected
|
||||||
|
// by the worker's local timezone. `date` is stored as a UTC DATE.
|
||||||
|
const cutoff = literal(
|
||||||
|
`(NOW() AT TIME ZONE 'UTC')::date - INTERVAL '${RETENTION_DAYS} days'`
|
||||||
|
);
|
||||||
|
const deleted = await DocumentInsight.destroy({
|
||||||
|
where: { date: { [Op.lt]: cutoff } },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (deleted > 0) {
|
||||||
|
Logger.info("task", `Deleted ${deleted} expired document_insights rows`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public get cron() {
|
||||||
|
return {
|
||||||
|
interval: TaskInterval.Day,
|
||||||
|
partitionWindow: 30 * Minute.ms,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public get options() {
|
||||||
|
return {
|
||||||
|
attempts: 1,
|
||||||
|
priority: TaskPriority.Background,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,247 @@
|
|||||||
|
import { format, subDays } from "date-fns";
|
||||||
|
import { DocumentInsight, Event, Reaction, Revision } from "@server/models";
|
||||||
|
import {
|
||||||
|
buildComment,
|
||||||
|
buildDocument,
|
||||||
|
buildTeam,
|
||||||
|
buildUser,
|
||||||
|
} from "@server/test/factories";
|
||||||
|
import RollupDocumentInsightsTask from "./RollupDocumentInsightsTask";
|
||||||
|
|
||||||
|
const props = {
|
||||||
|
limit: 10000,
|
||||||
|
partition: {
|
||||||
|
partitionIndex: 0,
|
||||||
|
partitionCount: 1,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const daysAgo = (n: number) => subDays(new Date(), n);
|
||||||
|
const dayStr = (d: Date) => format(d, "yyyy-MM-dd");
|
||||||
|
|
||||||
|
describe("RollupDocumentInsightsTask", () => {
|
||||||
|
let task: RollupDocumentInsightsTask;
|
||||||
|
|
||||||
|
beforeAll(() => {
|
||||||
|
jest.setTimeout(30000);
|
||||||
|
});
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
task = new RollupDocumentInsightsTask();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("writes nothing when no source activity exists", async () => {
|
||||||
|
const team = await buildTeam();
|
||||||
|
await buildDocument({ teamId: team.id });
|
||||||
|
|
||||||
|
await task.perform(props);
|
||||||
|
|
||||||
|
const count = await DocumentInsight.count({ where: { teamId: team.id } });
|
||||||
|
expect(count).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rolls up view events into viewCount and viewerCount", async () => {
|
||||||
|
const team = await buildTeam();
|
||||||
|
const userA = await buildUser({ teamId: team.id });
|
||||||
|
const userB = await buildUser({ teamId: team.id });
|
||||||
|
const document = await buildDocument({
|
||||||
|
teamId: team.id,
|
||||||
|
publishedAt: new Date(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const yesterday = daysAgo(1);
|
||||||
|
await Event.create({
|
||||||
|
name: "views.create",
|
||||||
|
teamId: team.id,
|
||||||
|
userId: userA.id,
|
||||||
|
documentId: document.id,
|
||||||
|
createdAt: yesterday,
|
||||||
|
});
|
||||||
|
await Event.create({
|
||||||
|
name: "views.create",
|
||||||
|
teamId: team.id,
|
||||||
|
userId: userA.id,
|
||||||
|
documentId: document.id,
|
||||||
|
createdAt: yesterday,
|
||||||
|
});
|
||||||
|
await Event.create({
|
||||||
|
name: "views.create",
|
||||||
|
teamId: team.id,
|
||||||
|
userId: userB.id,
|
||||||
|
documentId: document.id,
|
||||||
|
createdAt: yesterday,
|
||||||
|
});
|
||||||
|
|
||||||
|
await task.perform(props);
|
||||||
|
|
||||||
|
const insight = await DocumentInsight.findOne({
|
||||||
|
where: { documentId: document.id, date: dayStr(yesterday) },
|
||||||
|
});
|
||||||
|
expect(insight).toBeTruthy();
|
||||||
|
expect(insight!.viewCount).toBe(3);
|
||||||
|
expect(insight!.viewerCount).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rolls up comments and reactions without double-counting", async () => {
|
||||||
|
const team = await buildTeam();
|
||||||
|
const user = await buildUser({ teamId: team.id });
|
||||||
|
const document = await buildDocument({
|
||||||
|
teamId: team.id,
|
||||||
|
publishedAt: new Date(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const yesterday = daysAgo(1);
|
||||||
|
// Older comment that only receives a reaction yesterday.
|
||||||
|
const olderComment = await buildComment({
|
||||||
|
documentId: document.id,
|
||||||
|
userId: user.id,
|
||||||
|
createdAt: daysAgo(10),
|
||||||
|
});
|
||||||
|
|
||||||
|
// New comment yesterday.
|
||||||
|
await buildComment({
|
||||||
|
documentId: document.id,
|
||||||
|
userId: user.id,
|
||||||
|
createdAt: yesterday,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Reaction on the older comment, but created yesterday.
|
||||||
|
await Reaction.create(
|
||||||
|
{
|
||||||
|
emoji: "🎉",
|
||||||
|
commentId: olderComment.id,
|
||||||
|
userId: user.id,
|
||||||
|
createdAt: yesterday,
|
||||||
|
},
|
||||||
|
{ hooks: false }
|
||||||
|
);
|
||||||
|
|
||||||
|
await task.perform(props);
|
||||||
|
|
||||||
|
const insight = await DocumentInsight.findOne({
|
||||||
|
where: { documentId: document.id, date: dayStr(yesterday) },
|
||||||
|
});
|
||||||
|
expect(insight).toBeTruthy();
|
||||||
|
expect(insight!.commentCount).toBe(1);
|
||||||
|
expect(insight!.reactionCount).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rolls up revisions into revisionCount and editorCount", async () => {
|
||||||
|
const team = await buildTeam();
|
||||||
|
const userA = await buildUser({ teamId: team.id });
|
||||||
|
const userB = await buildUser({ teamId: team.id });
|
||||||
|
const document = await buildDocument({
|
||||||
|
teamId: team.id,
|
||||||
|
publishedAt: new Date(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const yesterday = daysAgo(1);
|
||||||
|
await Revision.create({
|
||||||
|
documentId: document.id,
|
||||||
|
userId: userA.id,
|
||||||
|
title: document.title,
|
||||||
|
text: "A",
|
||||||
|
createdAt: yesterday,
|
||||||
|
});
|
||||||
|
await Revision.create({
|
||||||
|
documentId: document.id,
|
||||||
|
userId: userA.id,
|
||||||
|
title: document.title,
|
||||||
|
text: "B",
|
||||||
|
createdAt: yesterday,
|
||||||
|
});
|
||||||
|
await Revision.create({
|
||||||
|
documentId: document.id,
|
||||||
|
userId: userB.id,
|
||||||
|
title: document.title,
|
||||||
|
text: "C",
|
||||||
|
createdAt: yesterday,
|
||||||
|
});
|
||||||
|
|
||||||
|
await task.perform(props);
|
||||||
|
|
||||||
|
const insight = await DocumentInsight.findOne({
|
||||||
|
where: { documentId: document.id, date: dayStr(yesterday) },
|
||||||
|
});
|
||||||
|
expect(insight).toBeTruthy();
|
||||||
|
expect(insight!.revisionCount).toBe(3);
|
||||||
|
expect(insight!.editorCount).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("is idempotent when re-run for the same day", async () => {
|
||||||
|
const team = await buildTeam();
|
||||||
|
const user = await buildUser({ teamId: team.id });
|
||||||
|
const document = await buildDocument({
|
||||||
|
teamId: team.id,
|
||||||
|
publishedAt: new Date(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const yesterday = daysAgo(1);
|
||||||
|
await Event.create({
|
||||||
|
name: "views.create",
|
||||||
|
teamId: team.id,
|
||||||
|
userId: user.id,
|
||||||
|
documentId: document.id,
|
||||||
|
createdAt: yesterday,
|
||||||
|
});
|
||||||
|
|
||||||
|
await task.perform(props);
|
||||||
|
await task.perform(props);
|
||||||
|
|
||||||
|
const rows = await DocumentInsight.findAll({
|
||||||
|
where: { documentId: document.id, date: dayStr(yesterday) },
|
||||||
|
});
|
||||||
|
expect(rows).toHaveLength(1);
|
||||||
|
expect(rows[0].viewCount).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("excludes deleted documents", async () => {
|
||||||
|
const team = await buildTeam();
|
||||||
|
const user = await buildUser({ teamId: team.id });
|
||||||
|
const document = await buildDocument({
|
||||||
|
teamId: team.id,
|
||||||
|
publishedAt: new Date(),
|
||||||
|
deletedAt: new Date(),
|
||||||
|
});
|
||||||
|
|
||||||
|
await Event.create({
|
||||||
|
name: "views.create",
|
||||||
|
teamId: team.id,
|
||||||
|
userId: user.id,
|
||||||
|
documentId: document.id,
|
||||||
|
createdAt: daysAgo(1),
|
||||||
|
});
|
||||||
|
|
||||||
|
await task.perform(props);
|
||||||
|
|
||||||
|
const count = await DocumentInsight.count({
|
||||||
|
where: { documentId: document.id },
|
||||||
|
});
|
||||||
|
expect(count).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("includes unpublished documents", async () => {
|
||||||
|
const team = await buildTeam();
|
||||||
|
const user = await buildUser({ teamId: team.id });
|
||||||
|
const draft = await buildDocument({
|
||||||
|
teamId: team.id,
|
||||||
|
publishedAt: undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
await Revision.create({
|
||||||
|
documentId: draft.id,
|
||||||
|
userId: user.id,
|
||||||
|
title: draft.title,
|
||||||
|
text: "x",
|
||||||
|
createdAt: daysAgo(1),
|
||||||
|
});
|
||||||
|
|
||||||
|
await task.perform(props);
|
||||||
|
|
||||||
|
const insight = await DocumentInsight.findOne({
|
||||||
|
where: { documentId: draft.id, date: dayStr(daysAgo(1)) },
|
||||||
|
});
|
||||||
|
expect(insight).toBeTruthy();
|
||||||
|
expect(insight!.revisionCount).toBe(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,151 @@
|
|||||||
|
import { QueryTypes } from "sequelize";
|
||||||
|
import { Day, Minute } from "@shared/utils/time";
|
||||||
|
import Logger from "@server/logging/Logger";
|
||||||
|
import { sequelize } from "@server/storage/database";
|
||||||
|
import { TaskPriority } from "./base/BaseTask";
|
||||||
|
import type { Props } from "./base/CronTask";
|
||||||
|
import { CronTask, TaskInterval } from "./base/CronTask";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of recent days to (re)compute on each run, in addition to the current
|
||||||
|
* day. Reprocessing the most recent days lets late-arriving writes (slow
|
||||||
|
* workers, out-of-order event emission) settle into the rollup. The upsert is
|
||||||
|
* idempotent.
|
||||||
|
*/
|
||||||
|
const RECOMPUTE_DAYS = 2;
|
||||||
|
|
||||||
|
export default class RollupDocumentInsightsTask extends CronTask {
|
||||||
|
public async perform({ partition }: Props) {
|
||||||
|
const [startUuid, endUuid] = this.getPartitionBounds(partition);
|
||||||
|
|
||||||
|
for (let offset = RECOMPUTE_DAYS; offset >= 0; offset--) {
|
||||||
|
const date = new Date(Date.now() - offset * Day.ms)
|
||||||
|
.toISOString()
|
||||||
|
.slice(0, 10);
|
||||||
|
await this.rollupDay(date, startUuid, endUuid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async rollupDay(
|
||||||
|
date: string,
|
||||||
|
startUuid: string,
|
||||||
|
endUuid: string
|
||||||
|
): Promise<void> {
|
||||||
|
const [{ upserted }] = await sequelize.query<{ upserted: string }>(
|
||||||
|
`
|
||||||
|
WITH partitioned_documents AS (
|
||||||
|
SELECT id, "teamId"
|
||||||
|
FROM documents
|
||||||
|
WHERE "deletedAt" IS NULL
|
||||||
|
AND id >= :startUuid::uuid
|
||||||
|
AND id <= :endUuid::uuid
|
||||||
|
),
|
||||||
|
view_counts AS (
|
||||||
|
SELECT
|
||||||
|
e."documentId",
|
||||||
|
COUNT(*) AS view_count,
|
||||||
|
COUNT(DISTINCT e."userId") AS viewer_count
|
||||||
|
FROM events e
|
||||||
|
INNER JOIN partitioned_documents pd ON pd.id = e."documentId"
|
||||||
|
WHERE e.name = 'views.create'
|
||||||
|
AND e."createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC'
|
||||||
|
AND e."createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC'
|
||||||
|
GROUP BY e."documentId"
|
||||||
|
),
|
||||||
|
comment_counts AS (
|
||||||
|
SELECT c."documentId", COUNT(*) AS comment_count
|
||||||
|
FROM comments c
|
||||||
|
INNER JOIN partitioned_documents pd ON pd.id = c."documentId"
|
||||||
|
WHERE c."createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC'
|
||||||
|
AND c."createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC'
|
||||||
|
GROUP BY c."documentId"
|
||||||
|
),
|
||||||
|
reaction_counts AS (
|
||||||
|
SELECT c."documentId", COUNT(rx.id) AS reaction_count
|
||||||
|
FROM reactions rx
|
||||||
|
INNER JOIN comments c ON c.id = rx."commentId"
|
||||||
|
INNER JOIN partitioned_documents pd ON pd.id = c."documentId"
|
||||||
|
WHERE rx."createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC'
|
||||||
|
AND rx."createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC'
|
||||||
|
GROUP BY c."documentId"
|
||||||
|
),
|
||||||
|
revision_counts AS (
|
||||||
|
SELECT
|
||||||
|
r."documentId",
|
||||||
|
COUNT(*) AS revision_count,
|
||||||
|
COUNT(DISTINCT r."userId") AS editor_count
|
||||||
|
FROM revisions r
|
||||||
|
INNER JOIN partitioned_documents pd ON pd.id = r."documentId"
|
||||||
|
WHERE r."createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC'
|
||||||
|
AND r."createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC'
|
||||||
|
GROUP BY r."documentId"
|
||||||
|
),
|
||||||
|
active AS (
|
||||||
|
SELECT "documentId" FROM view_counts
|
||||||
|
UNION SELECT "documentId" FROM comment_counts
|
||||||
|
UNION SELECT "documentId" FROM reaction_counts
|
||||||
|
UNION SELECT "documentId" FROM revision_counts
|
||||||
|
),
|
||||||
|
inserted AS (
|
||||||
|
INSERT INTO document_insights (
|
||||||
|
id, "documentId", "teamId", date,
|
||||||
|
"viewCount", "viewerCount",
|
||||||
|
"commentCount", "reactionCount",
|
||||||
|
"revisionCount", "editorCount",
|
||||||
|
"createdAt", "updatedAt"
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
uuid_generate_v4(),
|
||||||
|
pd.id,
|
||||||
|
pd."teamId",
|
||||||
|
:dayStart::date,
|
||||||
|
COALESCE(v.view_count, 0),
|
||||||
|
COALESCE(v.viewer_count, 0),
|
||||||
|
COALESCE(c.comment_count, 0),
|
||||||
|
COALESCE(rx.reaction_count, 0),
|
||||||
|
COALESCE(r.revision_count, 0),
|
||||||
|
COALESCE(r.editor_count, 0),
|
||||||
|
NOW(), NOW()
|
||||||
|
FROM active a
|
||||||
|
INNER JOIN partitioned_documents pd ON pd.id = a."documentId"
|
||||||
|
LEFT JOIN view_counts v ON v."documentId" = pd.id
|
||||||
|
LEFT JOIN comment_counts c ON c."documentId" = pd.id
|
||||||
|
LEFT JOIN reaction_counts rx ON rx."documentId" = pd.id
|
||||||
|
LEFT JOIN revision_counts r ON r."documentId" = pd.id
|
||||||
|
ON CONFLICT ("documentId", date) DO UPDATE SET
|
||||||
|
"viewCount" = EXCLUDED."viewCount",
|
||||||
|
"viewerCount" = EXCLUDED."viewerCount",
|
||||||
|
"commentCount" = EXCLUDED."commentCount",
|
||||||
|
"reactionCount" = EXCLUDED."reactionCount",
|
||||||
|
"revisionCount" = EXCLUDED."revisionCount",
|
||||||
|
"editorCount" = EXCLUDED."editorCount",
|
||||||
|
"updatedAt" = NOW()
|
||||||
|
RETURNING 1
|
||||||
|
)
|
||||||
|
SELECT COUNT(*)::text AS upserted FROM inserted
|
||||||
|
`,
|
||||||
|
{
|
||||||
|
replacements: { dayStart: date, startUuid, endUuid },
|
||||||
|
type: QueryTypes.SELECT,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
Logger.info("task", `Rolled up document insights for ${date}`, {
|
||||||
|
upserted: parseInt(upserted, 10),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public get cron() {
|
||||||
|
return {
|
||||||
|
interval: TaskInterval.Day,
|
||||||
|
partitionWindow: 30 * Minute.ms,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public get options() {
|
||||||
|
return {
|
||||||
|
attempts: 1,
|
||||||
|
priority: TaskPriority.Background,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -24,6 +24,7 @@ import {
|
|||||||
} from "@shared/types";
|
} from "@shared/types";
|
||||||
import { subtractDate } from "@shared/utils/date";
|
import { subtractDate } from "@shared/utils/date";
|
||||||
import slugify from "@shared/utils/slugify";
|
import slugify from "@shared/utils/slugify";
|
||||||
|
import { Day } from "@shared/utils/time";
|
||||||
import documentCreator from "@server/commands/documentCreator";
|
import documentCreator from "@server/commands/documentCreator";
|
||||||
import documentDuplicator from "@server/commands/documentDuplicator";
|
import documentDuplicator from "@server/commands/documentDuplicator";
|
||||||
import documentLoader from "@server/commands/documentLoader";
|
import documentLoader from "@server/commands/documentLoader";
|
||||||
@@ -49,6 +50,7 @@ import {
|
|||||||
Relationship,
|
Relationship,
|
||||||
Collection,
|
Collection,
|
||||||
Document,
|
Document,
|
||||||
|
DocumentInsight,
|
||||||
Event,
|
Event,
|
||||||
Revision,
|
Revision,
|
||||||
SearchQuery,
|
SearchQuery,
|
||||||
@@ -69,6 +71,7 @@ import { TextHelper } from "@server/models/helpers/TextHelper";
|
|||||||
import { authorize, cannot } from "@server/policies";
|
import { authorize, cannot } from "@server/policies";
|
||||||
import {
|
import {
|
||||||
presentDocument,
|
presentDocument,
|
||||||
|
presentDocumentInsight,
|
||||||
presentDocuments,
|
presentDocuments,
|
||||||
presentPolicies,
|
presentPolicies,
|
||||||
presentTemplate,
|
presentTemplate,
|
||||||
@@ -621,6 +624,41 @@ router.post(
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
router.post(
|
||||||
|
"documents.insights",
|
||||||
|
auth(),
|
||||||
|
validate(T.DocumentsInsightsSchema),
|
||||||
|
async (ctx: APIContext<T.DocumentsInsightsReq>) => {
|
||||||
|
const { id, startDate, endDate } = ctx.input.body;
|
||||||
|
const { user } = ctx.state.auth;
|
||||||
|
|
||||||
|
const document = await Document.findByPk(id, { userId: user.id });
|
||||||
|
authorize(user, "listViews", document);
|
||||||
|
|
||||||
|
if (!document.insightsEnabled) {
|
||||||
|
throw ValidationError("Insights are not enabled for this document");
|
||||||
|
}
|
||||||
|
|
||||||
|
const end = endDate ?? new Date();
|
||||||
|
const start = startDate ?? new Date(end.getTime() - 30 * Day.ms);
|
||||||
|
|
||||||
|
const insights = await DocumentInsight.findAll({
|
||||||
|
where: {
|
||||||
|
documentId: document.id,
|
||||||
|
date: {
|
||||||
|
[Op.gte]: start.toISOString().slice(0, 10),
|
||||||
|
[Op.lte]: end.toISOString().slice(0, 10),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
order: [["date", "ASC"]],
|
||||||
|
});
|
||||||
|
|
||||||
|
ctx.body = {
|
||||||
|
data: insights.map(presentDocumentInsight),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
router.post(
|
router.post(
|
||||||
"documents.users",
|
"documents.users",
|
||||||
auth(),
|
auth(),
|
||||||
|
|||||||
@@ -154,6 +154,25 @@ export const DocumentsInfoSchema = BaseSchema.extend({
|
|||||||
|
|
||||||
export type DocumentsInfoReq = z.infer<typeof DocumentsInfoSchema>;
|
export type DocumentsInfoReq = z.infer<typeof DocumentsInfoSchema>;
|
||||||
|
|
||||||
|
export const DocumentsInsightsSchema = BaseSchema.extend({
|
||||||
|
body: BaseIdSchema.extend({
|
||||||
|
/** Start of the insights window (inclusive). Defaults to 30 days ago. */
|
||||||
|
startDate: z.coerce.date().optional(),
|
||||||
|
/** End of the insights window (inclusive). Defaults to today. */
|
||||||
|
endDate: z.coerce.date().optional(),
|
||||||
|
}),
|
||||||
|
}).refine(
|
||||||
|
(req) =>
|
||||||
|
!req.body.startDate ||
|
||||||
|
!req.body.endDate ||
|
||||||
|
req.body.startDate <= req.body.endDate,
|
||||||
|
{
|
||||||
|
message: "startDate must be on or before endDate",
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
export type DocumentsInsightsReq = z.infer<typeof DocumentsInsightsSchema>;
|
||||||
|
|
||||||
export const DocumentsExportSchema = BaseSchema.extend({
|
export const DocumentsExportSchema = BaseSchema.extend({
|
||||||
body: BaseIdSchema.extend({
|
body: BaseIdSchema.extend({
|
||||||
signedUrls: z.number().optional(),
|
signedUrls: z.number().optional(),
|
||||||
|
|||||||
@@ -0,0 +1,129 @@
|
|||||||
|
import "./bootstrap";
|
||||||
|
import { QueryTypes } from "sequelize";
|
||||||
|
import { Day } from "@shared/utils/time";
|
||||||
|
import { sequelize } from "@server/storage/database";
|
||||||
|
|
||||||
|
const DEFAULT_DAYS = 14;
|
||||||
|
|
||||||
|
const days = parseInt(process.argv[2], 10);
|
||||||
|
const backfillDays = Number.isNaN(days) ? DEFAULT_DAYS : days;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Populates document_insights with one row per (document, day) for each day
|
||||||
|
* within the backfill window that has source activity. Safe to re-run — the
|
||||||
|
* upsert keys on (documentId, date). Source ranges are half-open
|
||||||
|
* [dayStart, dayStart + 1) in UTC so events land in exactly one day.
|
||||||
|
*/
|
||||||
|
async function backfillDay(date: string): Promise<number> {
|
||||||
|
const [{ upserted }] = await sequelize.query<{ upserted: string }>(
|
||||||
|
`
|
||||||
|
WITH view_counts AS (
|
||||||
|
SELECT
|
||||||
|
"documentId",
|
||||||
|
COUNT(*) AS view_count,
|
||||||
|
COUNT(DISTINCT "userId") AS viewer_count
|
||||||
|
FROM events
|
||||||
|
WHERE name = 'views.create'
|
||||||
|
AND "createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC'
|
||||||
|
AND "createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC'
|
||||||
|
GROUP BY "documentId"
|
||||||
|
),
|
||||||
|
comment_counts AS (
|
||||||
|
SELECT "documentId", COUNT(*) AS comment_count
|
||||||
|
FROM comments
|
||||||
|
WHERE "createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC'
|
||||||
|
AND "createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC'
|
||||||
|
GROUP BY "documentId"
|
||||||
|
),
|
||||||
|
reaction_counts AS (
|
||||||
|
SELECT c."documentId", COUNT(rx.id) AS reaction_count
|
||||||
|
FROM reactions rx
|
||||||
|
INNER JOIN comments c ON c.id = rx."commentId"
|
||||||
|
WHERE rx."createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC'
|
||||||
|
AND rx."createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC'
|
||||||
|
GROUP BY c."documentId"
|
||||||
|
),
|
||||||
|
revision_counts AS (
|
||||||
|
SELECT
|
||||||
|
"documentId",
|
||||||
|
COUNT(*) AS revision_count,
|
||||||
|
COUNT(DISTINCT "userId") AS editor_count
|
||||||
|
FROM revisions
|
||||||
|
WHERE "createdAt" >= :dayStart::timestamp AT TIME ZONE 'UTC'
|
||||||
|
AND "createdAt" < (:dayStart::timestamp + INTERVAL '1 day') AT TIME ZONE 'UTC'
|
||||||
|
GROUP BY "documentId"
|
||||||
|
),
|
||||||
|
active AS (
|
||||||
|
SELECT "documentId" FROM view_counts
|
||||||
|
UNION SELECT "documentId" FROM comment_counts
|
||||||
|
UNION SELECT "documentId" FROM reaction_counts
|
||||||
|
UNION SELECT "documentId" FROM revision_counts
|
||||||
|
),
|
||||||
|
inserted AS (
|
||||||
|
INSERT INTO document_insights (
|
||||||
|
id, "documentId", "teamId", date,
|
||||||
|
"viewCount", "viewerCount",
|
||||||
|
"commentCount", "reactionCount",
|
||||||
|
"revisionCount", "editorCount",
|
||||||
|
"createdAt", "updatedAt"
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
uuid_generate_v4(),
|
||||||
|
d.id,
|
||||||
|
d."teamId",
|
||||||
|
:dayStart::date,
|
||||||
|
COALESCE(v.view_count, 0),
|
||||||
|
COALESCE(v.viewer_count, 0),
|
||||||
|
COALESCE(c.comment_count, 0),
|
||||||
|
COALESCE(rx.reaction_count, 0),
|
||||||
|
COALESCE(r.revision_count, 0),
|
||||||
|
COALESCE(r.editor_count, 0),
|
||||||
|
NOW(), NOW()
|
||||||
|
FROM active a
|
||||||
|
INNER JOIN documents d ON d.id = a."documentId"
|
||||||
|
LEFT JOIN view_counts v ON v."documentId" = d.id
|
||||||
|
LEFT JOIN comment_counts c ON c."documentId" = d.id
|
||||||
|
LEFT JOIN reaction_counts rx ON rx."documentId" = d.id
|
||||||
|
LEFT JOIN revision_counts r ON r."documentId" = d.id
|
||||||
|
WHERE d."deletedAt" IS NULL
|
||||||
|
ON CONFLICT ("documentId", date) DO UPDATE SET
|
||||||
|
"viewCount" = EXCLUDED."viewCount",
|
||||||
|
"viewerCount" = EXCLUDED."viewerCount",
|
||||||
|
"commentCount" = EXCLUDED."commentCount",
|
||||||
|
"reactionCount" = EXCLUDED."reactionCount",
|
||||||
|
"revisionCount" = EXCLUDED."revisionCount",
|
||||||
|
"editorCount" = EXCLUDED."editorCount",
|
||||||
|
"updatedAt" = NOW()
|
||||||
|
RETURNING 1
|
||||||
|
)
|
||||||
|
SELECT COUNT(*)::text AS upserted FROM inserted
|
||||||
|
`,
|
||||||
|
{
|
||||||
|
replacements: { dayStart: date },
|
||||||
|
type: QueryTypes.SELECT,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
return parseInt(upserted, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log(`Backfilling ${backfillDays} days of document insights…`);
|
||||||
|
|
||||||
|
for (let offset = backfillDays; offset >= 1; offset--) {
|
||||||
|
const date = new Date(Date.now() - offset * Day.ms)
|
||||||
|
.toISOString()
|
||||||
|
.slice(0, 10);
|
||||||
|
const upserted = await backfillDay(date);
|
||||||
|
console.log(` ${date}: ${upserted} rows`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("Backfill complete");
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (process.env.NODE_ENV !== "test") {
|
||||||
|
void main();
|
||||||
|
}
|
||||||
|
|
||||||
|
export default main;
|
||||||
+27
-21
@@ -484,29 +484,35 @@ export async function buildComment(overrides: {
|
|||||||
parentCommentId?: string;
|
parentCommentId?: string;
|
||||||
resolvedById?: string;
|
resolvedById?: string;
|
||||||
reactions?: ReactionSummary[];
|
reactions?: ReactionSummary[];
|
||||||
|
createdAt?: Date;
|
||||||
}) {
|
}) {
|
||||||
const comment = await Comment.create({
|
const comment = await Comment.create(
|
||||||
resolvedById: overrides.resolvedById,
|
{
|
||||||
parentCommentId: overrides.parentCommentId,
|
resolvedById: overrides.resolvedById,
|
||||||
documentId: overrides.documentId,
|
parentCommentId: overrides.parentCommentId,
|
||||||
data: {
|
documentId: overrides.documentId,
|
||||||
type: "doc",
|
data: {
|
||||||
content: [
|
type: "doc",
|
||||||
{
|
content: [
|
||||||
type: "paragraph",
|
{
|
||||||
content: [
|
type: "paragraph",
|
||||||
{
|
content: [
|
||||||
content: [],
|
{
|
||||||
type: "text",
|
content: [],
|
||||||
text: "test",
|
type: "text",
|
||||||
},
|
text: "test",
|
||||||
],
|
},
|
||||||
},
|
],
|
||||||
],
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
createdById: overrides.userId,
|
||||||
|
reactions: overrides.reactions,
|
||||||
|
createdAt: overrides.createdAt,
|
||||||
|
updatedAt: overrides.createdAt,
|
||||||
},
|
},
|
||||||
createdById: overrides.userId,
|
{ silent: overrides.createdAt ? true : false }
|
||||||
reactions: overrides.reactions,
|
);
|
||||||
});
|
|
||||||
|
|
||||||
return comment;
|
return comment;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user