mirror of
https://github.com/outline/outline.git
synced 2026-06-13 11:25:03 +03:00
1f097b0fdd
* chore: resolve no-explicit-any lint warnings in plugins Replaces uses of `any` in the plugins directory with concrete types, `unknown`, or structured type assertions, addressing the remaining typescript-eslint(no-explicit-any) warnings flagged by oxlint. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * chore: address review feedback in GitLabIssueProvider Drop trailing semicolon from log string and add early return in `destroyNamespace` when neither `user_id` nor `full_path` is present to avoid an unnecessary full-scan transaction. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
892 lines
22 KiB
TypeScript
892 lines
22 KiB
TypeScript
import invariant from "invariant";
|
|
import escapeRegExp from "lodash/escapeRegExp";
|
|
import find from "lodash/find";
|
|
import map from "lodash/map";
|
|
import queryParser from "pg-tsquery";
|
|
import type {
|
|
BindOrReplacements,
|
|
FindAttributeOptions,
|
|
FindOptions,
|
|
Order,
|
|
WhereOptions,
|
|
} from "sequelize";
|
|
import { Op, Sequelize } from "sequelize";
|
|
import type { SearchableModel } from "@shared/types";
|
|
import { DirectionFilter, SortFilter, StatusFilter } from "@shared/types";
|
|
import { regexIndexOf, regexLastIndexOf } from "@shared/utils/string";
|
|
import { getUrls } from "@shared/utils/urls";
|
|
import { ValidationError } from "@server/errors";
|
|
import Collection from "@server/models/Collection";
|
|
import type Comment from "@server/models/Comment";
|
|
import Document from "@server/models/Document";
|
|
import Team from "@server/models/Team";
|
|
import User from "@server/models/User";
|
|
import { DocumentHelper } from "@server/models/helpers/DocumentHelper";
|
|
import { sequelize } from "@server/storage/database";
|
|
import type {
|
|
SearchOptions,
|
|
SearchResponse,
|
|
} from "@server/utils/BaseSearchProvider";
|
|
import { BaseSearchProvider } from "@server/utils/BaseSearchProvider";
|
|
|
|
type RankedDocument = Document & {
|
|
id: string;
|
|
dataValues: Partial<Document> & {
|
|
searchRanking: number;
|
|
};
|
|
};
|
|
|
|
/**
|
|
* Search provider that uses PostgreSQL full-text search via tsvector.
|
|
* Indexing is handled by database triggers, so index/remove/updateMetadata
|
|
* are no-ops.
|
|
*/
|
|
export default class PostgresSearchProvider extends BaseSearchProvider {
|
|
id = "postgres";
|
|
|
|
/**
|
|
* The maximum length of a search query.
|
|
*/
|
|
public static maxQueryLength = 1000;
|
|
|
|
/**
|
|
* Cached regex pattern for single quotes to avoid recompilation.
|
|
*/
|
|
private static readonly SINGLE_QUOTE_REGEX = /'+/g;
|
|
|
|
/**
|
|
* Cached regex pattern for quoted queries.
|
|
*/
|
|
private static readonly QUOTED_QUERY_REGEX = /"([^"]*)"/g;
|
|
|
|
/**
|
|
* Cached regex pattern for break characters.
|
|
*/
|
|
private static readonly BREAK_CHARS_REGEX = new RegExp(
|
|
`[ .,"'\n。!?!?…]`,
|
|
"g"
|
|
);
|
|
|
|
/**
|
|
* Cached stop words set for efficient lookup.
|
|
* Based on: https://github.com/postgres/postgres/blob/fc0d0ce978752493868496be6558fa17b7c4c3cf/src/backend/snowball/stopwords/english.stop
|
|
*/
|
|
private static readonly STOP_WORDS = new Set([
|
|
"i",
|
|
"me",
|
|
"my",
|
|
"myself",
|
|
"we",
|
|
"our",
|
|
"ours",
|
|
"ourselves",
|
|
"you",
|
|
"your",
|
|
"yours",
|
|
"yourself",
|
|
"yourselves",
|
|
"he",
|
|
"him",
|
|
"his",
|
|
"himself",
|
|
"she",
|
|
"her",
|
|
"hers",
|
|
"herself",
|
|
"it",
|
|
"its",
|
|
"itself",
|
|
"they",
|
|
"them",
|
|
"their",
|
|
"theirs",
|
|
"themselves",
|
|
"what",
|
|
"which",
|
|
"who",
|
|
"whom",
|
|
"this",
|
|
"that",
|
|
"these",
|
|
"those",
|
|
"am",
|
|
"is",
|
|
"are",
|
|
"was",
|
|
"were",
|
|
"be",
|
|
"been",
|
|
"being",
|
|
"have",
|
|
"has",
|
|
"had",
|
|
"having",
|
|
"do",
|
|
"does",
|
|
"did",
|
|
"doing",
|
|
"a",
|
|
"an",
|
|
"the",
|
|
"and",
|
|
"but",
|
|
"if",
|
|
"or",
|
|
"because",
|
|
"as",
|
|
"until",
|
|
"of",
|
|
"at",
|
|
"by",
|
|
"for",
|
|
"with",
|
|
"about",
|
|
"against",
|
|
"into",
|
|
"through",
|
|
"during",
|
|
"before",
|
|
"after",
|
|
"above",
|
|
"below",
|
|
"from",
|
|
"down",
|
|
"off",
|
|
"over",
|
|
"under",
|
|
"again",
|
|
"then",
|
|
"once",
|
|
"here",
|
|
"there",
|
|
"when",
|
|
"where",
|
|
"why",
|
|
"any",
|
|
"both",
|
|
"each",
|
|
"few",
|
|
"other",
|
|
"some",
|
|
"such",
|
|
"nor",
|
|
"only",
|
|
"same",
|
|
"so",
|
|
"than",
|
|
"too",
|
|
"very",
|
|
"s",
|
|
"t",
|
|
"don",
|
|
"should",
|
|
]);
|
|
|
|
async searchForTeam(
|
|
team: Team,
|
|
options: SearchOptions = {}
|
|
): Promise<SearchResponse> {
|
|
const { limit = 15, offset = 0, query } = options;
|
|
|
|
const where = await PostgresSearchProvider.buildWhere(team, {
|
|
...options,
|
|
statusFilter: [...(options.statusFilter || []), StatusFilter.Published],
|
|
});
|
|
|
|
if (options.share) {
|
|
let documentIds: string[] | undefined;
|
|
|
|
if (options.share.collectionId) {
|
|
const sharedCollection =
|
|
options.share.collection ??
|
|
(await options.share.$get("collection", { scope: "unscoped" }));
|
|
invariant(sharedCollection, "Cannot find collection for share");
|
|
documentIds = sharedCollection.getAllDocumentIds();
|
|
} else if (
|
|
options.share.documentId &&
|
|
options.share.includeChildDocuments
|
|
) {
|
|
const sharedDocument = await options.share.$get("document");
|
|
invariant(sharedDocument, "Cannot find document for share");
|
|
|
|
const childDocumentIds = await sharedDocument.findAllChildDocumentIds({
|
|
archivedAt: {
|
|
[Op.is]: null,
|
|
},
|
|
});
|
|
|
|
documentIds = [sharedDocument.id, ...childDocumentIds];
|
|
}
|
|
|
|
where[Op.and].push({
|
|
id: documentIds,
|
|
});
|
|
}
|
|
|
|
const findOptions = PostgresSearchProvider.buildFindOptions({
|
|
query,
|
|
sort: options.sort,
|
|
direction: options.direction,
|
|
usePopularityBoost: options.usePopularityBoost,
|
|
});
|
|
|
|
try {
|
|
const resultsQuery = Document.unscoped().findAll({
|
|
...findOptions,
|
|
where,
|
|
limit,
|
|
offset,
|
|
}) as unknown as Promise<RankedDocument[]>;
|
|
|
|
const countQuery = Document.unscoped().count({
|
|
// @ts-expect-error Types are incorrect for count
|
|
replacements: findOptions.replacements,
|
|
where,
|
|
}) as unknown as Promise<number>;
|
|
const [results, count] = await Promise.all([resultsQuery, countQuery]);
|
|
|
|
// Final query to get associated document data
|
|
const documents = await Document.findAll({
|
|
where: {
|
|
id: map(results, "id"),
|
|
teamId: team.id,
|
|
},
|
|
include: [
|
|
{
|
|
model: Collection,
|
|
as: "collection",
|
|
},
|
|
],
|
|
});
|
|
|
|
return PostgresSearchProvider.buildResponse({
|
|
query,
|
|
results,
|
|
documents,
|
|
count,
|
|
});
|
|
} catch (err) {
|
|
if (err.message.includes("syntax error in tsquery")) {
|
|
throw ValidationError("Invalid search query");
|
|
}
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
async searchTitlesForUser(
|
|
user: User,
|
|
options: SearchOptions = {}
|
|
): Promise<Document[]> {
|
|
const { limit = 15, offset = 0, query, ...rest } = options;
|
|
const where = await PostgresSearchProvider.buildWhere(user, rest);
|
|
|
|
if (query) {
|
|
where[Op.and].push({
|
|
title: {
|
|
[Op.iLike]: `%${query}%`,
|
|
},
|
|
});
|
|
}
|
|
|
|
const include = [
|
|
{
|
|
association: "memberships",
|
|
where: {
|
|
userId: user.id,
|
|
},
|
|
required: false,
|
|
separate: false,
|
|
},
|
|
{
|
|
association: "groupMemberships",
|
|
required: false,
|
|
separate: false,
|
|
include: [
|
|
{
|
|
association: "group",
|
|
required: true,
|
|
include: [
|
|
{
|
|
association: "groupUsers",
|
|
required: true,
|
|
where: {
|
|
userId: user.id,
|
|
},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
{
|
|
model: User,
|
|
as: "createdBy",
|
|
paranoid: false,
|
|
},
|
|
{
|
|
model: User,
|
|
as: "updatedBy",
|
|
paranoid: false,
|
|
},
|
|
];
|
|
|
|
return Document.withMembershipScope(user.id, {
|
|
includeDrafts: true,
|
|
}).findAll({
|
|
where,
|
|
subQuery: false,
|
|
order: [
|
|
[
|
|
options.sort ?? SortFilter.UpdatedAt,
|
|
options.direction ?? DirectionFilter.DESC,
|
|
],
|
|
],
|
|
include,
|
|
offset,
|
|
limit,
|
|
});
|
|
}
|
|
|
|
async searchCollectionsForUser(
|
|
user: User,
|
|
options: SearchOptions = {}
|
|
): Promise<Collection[]> {
|
|
const { limit = 15, offset = 0, query } = options;
|
|
|
|
const collectionIds = await user.collectionIds();
|
|
|
|
return Collection.findAll({
|
|
where: {
|
|
[Op.and]: query
|
|
? {
|
|
[Op.or]: [
|
|
Sequelize.literal(
|
|
`unaccent(LOWER(name)) like unaccent(LOWER(:query))`
|
|
),
|
|
],
|
|
}
|
|
: {},
|
|
id: collectionIds,
|
|
teamId: user.teamId,
|
|
},
|
|
order: [["name", "ASC"]],
|
|
replacements: { query: `%${query}%` },
|
|
limit,
|
|
offset,
|
|
});
|
|
}
|
|
|
|
async searchForUser(
|
|
user: User,
|
|
options: SearchOptions = {}
|
|
): Promise<SearchResponse> {
|
|
const { limit = 15, offset = 0, query } = options;
|
|
|
|
const where = await PostgresSearchProvider.buildWhere(user, options);
|
|
|
|
const findOptions = PostgresSearchProvider.buildFindOptions({
|
|
query,
|
|
sort: options.sort,
|
|
direction: options.direction,
|
|
});
|
|
|
|
const include = [
|
|
{
|
|
association: "memberships",
|
|
where: {
|
|
userId: user.id,
|
|
},
|
|
required: false,
|
|
separate: false,
|
|
},
|
|
{
|
|
association: "groupMemberships",
|
|
required: false,
|
|
separate: false,
|
|
include: [
|
|
{
|
|
association: "group",
|
|
required: true,
|
|
include: [
|
|
{
|
|
association: "groupUsers",
|
|
required: true,
|
|
where: {
|
|
userId: user.id,
|
|
},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
];
|
|
|
|
try {
|
|
const results = (await Document.unscoped().findAll({
|
|
...findOptions,
|
|
subQuery: false,
|
|
include,
|
|
where,
|
|
limit,
|
|
offset,
|
|
})) as unknown as RankedDocument[];
|
|
|
|
const countQuery = Document.unscoped().count({
|
|
// @ts-expect-error Types are incorrect for count
|
|
subQuery: false,
|
|
include,
|
|
replacements: findOptions.replacements,
|
|
where,
|
|
}) as unknown as Promise<number>;
|
|
|
|
// Final query to get associated document data
|
|
const [documents, count] = await Promise.all([
|
|
Document.withMembershipScope(user.id, { includeDrafts: true }).findAll({
|
|
where: {
|
|
teamId: user.teamId,
|
|
id: map(results, "id"),
|
|
},
|
|
}),
|
|
results.length < limit && offset === 0
|
|
? Promise.resolve(results.length)
|
|
: countQuery,
|
|
]);
|
|
|
|
return PostgresSearchProvider.buildResponse({
|
|
query,
|
|
results,
|
|
documents,
|
|
count,
|
|
});
|
|
} catch (err) {
|
|
if (err.message.includes("syntax error in tsquery")) {
|
|
throw ValidationError("Invalid search query");
|
|
}
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* No-op for PostgreSQL — indexing is handled by database triggers.
|
|
*
|
|
* @param _model - unused.
|
|
* @param _item - unused.
|
|
*/
|
|
async index(
|
|
_model: SearchableModel,
|
|
_item: Document | Collection | Comment
|
|
): Promise<void> {
|
|
// PostgreSQL uses tsvector triggers for indexing
|
|
}
|
|
|
|
/**
|
|
* No-op for PostgreSQL — removal is handled by database cascades.
|
|
*
|
|
* @param _model - unused.
|
|
* @param _id - unused.
|
|
* @param _teamId - unused.
|
|
*/
|
|
async remove(
|
|
_model: SearchableModel,
|
|
_id: string,
|
|
_teamId: string
|
|
): Promise<void> {
|
|
// PostgreSQL handles removal via cascading deletes
|
|
}
|
|
|
|
/**
|
|
* No-op for PostgreSQL — metadata is stored in the same tables.
|
|
*
|
|
* @param _model - unused.
|
|
* @param _id - unused.
|
|
* @param _metadata - unused.
|
|
*/
|
|
async updateMetadata(
|
|
_model: SearchableModel,
|
|
_id: string,
|
|
_metadata: Record<string, unknown>
|
|
): Promise<void> {
|
|
// PostgreSQL metadata lives in the same row as the document
|
|
}
|
|
|
|
private static buildFindOptions({
|
|
query,
|
|
sort,
|
|
direction,
|
|
usePopularityBoost = true,
|
|
}: {
|
|
query?: string;
|
|
sort?: SortFilter;
|
|
direction?: DirectionFilter;
|
|
usePopularityBoost?: boolean;
|
|
}): FindOptions {
|
|
const attributes: FindAttributeOptions = ["id"];
|
|
const replacements: BindOrReplacements = {};
|
|
const order: Order = [];
|
|
|
|
if (query) {
|
|
const rankExpression = usePopularityBoost
|
|
? `ts_rank("searchVector", to_tsquery('english', :query)) * (1 + 0.25 * LN(1 + COALESCE("popularityScore", 0)))`
|
|
: `ts_rank("searchVector", to_tsquery('english', :query))`;
|
|
|
|
attributes.push([Sequelize.literal(rankExpression), "searchRanking"]);
|
|
replacements["query"] = PostgresSearchProvider.webSearchQuery(query);
|
|
}
|
|
|
|
// When searching with a query and no explicit sort, prioritize search
|
|
// ranking as the primary sort criterion. Otherwise, use the specified sort
|
|
// with ranking as a tiebreaker.
|
|
if (query && !sort) {
|
|
order.push(["searchRanking", "DESC"]);
|
|
order.push([SortFilter.UpdatedAt, DirectionFilter.DESC]);
|
|
} else {
|
|
const sortField = sort ?? SortFilter.UpdatedAt;
|
|
const sortDirection = direction ?? DirectionFilter.DESC;
|
|
|
|
if (sortField === SortFilter.Title) {
|
|
order.push([
|
|
Sequelize.fn("LOWER", Sequelize.col("title")),
|
|
sortDirection,
|
|
]);
|
|
} else {
|
|
order.push([sortField, sortDirection]);
|
|
}
|
|
|
|
if (query) {
|
|
order.push(["searchRanking", "DESC"]);
|
|
}
|
|
}
|
|
|
|
return { attributes, replacements, order };
|
|
}
|
|
|
|
private static buildResultContext(document: Document, query: string) {
|
|
// Reset regex lastIndex to avoid state issues with global regex
|
|
PostgresSearchProvider.QUOTED_QUERY_REGEX.lastIndex = 0;
|
|
const quotedQueries = Array.from(
|
|
query.matchAll(PostgresSearchProvider.QUOTED_QUERY_REGEX)
|
|
);
|
|
const text = DocumentHelper.toPlainText(document);
|
|
|
|
// Regex to highlight quoted queries as ts_headline will not do this by default due to stemming.
|
|
const fullMatchRegex = new RegExp(escapeRegExp(query), "i");
|
|
const highlightRegex = new RegExp(
|
|
[
|
|
fullMatchRegex.source,
|
|
...(quotedQueries.length
|
|
? quotedQueries.map((match) => escapeRegExp(match[1]))
|
|
: PostgresSearchProvider.removeStopWords(query)
|
|
.trim()
|
|
.split(" ")
|
|
.map((match) => `\\b${escapeRegExp(match)}\\b`)),
|
|
].join("|"),
|
|
"gi"
|
|
);
|
|
|
|
// Reset regex lastIndex to avoid state issues with global regex
|
|
PostgresSearchProvider.BREAK_CHARS_REGEX.lastIndex = 0;
|
|
const breakCharsRegex = PostgresSearchProvider.BREAK_CHARS_REGEX;
|
|
|
|
// chop text around the first match, prefer the first full match if possible.
|
|
const fullMatchIndex = text.search(fullMatchRegex);
|
|
const offsetStartIndex =
|
|
(fullMatchIndex >= 0 ? fullMatchIndex : text.search(highlightRegex)) - 65;
|
|
const startIndex = Math.max(
|
|
0,
|
|
offsetStartIndex <= 0
|
|
? 0
|
|
: regexIndexOf(text, breakCharsRegex, offsetStartIndex)
|
|
);
|
|
const context = text.replace(highlightRegex, "<b>$&</b>");
|
|
const endIndex = regexLastIndexOf(
|
|
context,
|
|
breakCharsRegex,
|
|
startIndex + 250
|
|
);
|
|
|
|
return context.slice(startIndex, endIndex);
|
|
}
|
|
|
|
private static async buildWhere(model: User | Team, options: SearchOptions) {
|
|
const teamId = model instanceof Team ? model.id : model.teamId;
|
|
const where: WhereOptions<Document> & {
|
|
[Op.or]: WhereOptions<Document>[];
|
|
[Op.and]: WhereOptions<Document>[];
|
|
} = {
|
|
teamId,
|
|
[Op.or]: [],
|
|
[Op.and]: [
|
|
{
|
|
deletedAt: {
|
|
[Op.eq]: null,
|
|
},
|
|
},
|
|
],
|
|
};
|
|
|
|
if (model instanceof User) {
|
|
where[Op.or].push(
|
|
{ "$memberships.id$": { [Op.ne]: null } },
|
|
{ "$groupMemberships.id$": { [Op.ne]: null } }
|
|
);
|
|
|
|
// Allow users to see their own drafts that have no collection, where no
|
|
// membership or collection access applies. Drafts in collections remain
|
|
// gated by the collection/membership checks above.
|
|
if (options.statusFilter?.includes(StatusFilter.Draft)) {
|
|
where[Op.or].push({
|
|
createdById: model.id,
|
|
collectionId: { [Op.is]: null },
|
|
publishedAt: { [Op.eq]: null },
|
|
archivedAt: { [Op.eq]: null },
|
|
});
|
|
}
|
|
}
|
|
|
|
// Ensure we're filtering by the users accessible collections. If
|
|
// collectionId is passed as an option it is assumed that the authorization
|
|
// has already been done in the router
|
|
const collectionIds = options.collectionId
|
|
? [options.collectionId]
|
|
: await model.collectionIds();
|
|
|
|
if (options.collectionId) {
|
|
where[Op.and].push({ collectionId: options.collectionId });
|
|
}
|
|
if (collectionIds.length) {
|
|
where[Op.or].push({ collectionId: collectionIds });
|
|
}
|
|
|
|
if (options.dateFilter) {
|
|
where[Op.and].push({
|
|
updatedAt: {
|
|
[Op.gt]: sequelize.literal(
|
|
`now() - interval '1 ${options.dateFilter}'`
|
|
),
|
|
},
|
|
});
|
|
}
|
|
|
|
if (options.collaboratorIds) {
|
|
where[Op.and].push({
|
|
collaboratorIds: {
|
|
[Op.contains]: options.collaboratorIds,
|
|
},
|
|
});
|
|
}
|
|
|
|
if (options.documentIds) {
|
|
where[Op.and].push({
|
|
id: options.documentIds,
|
|
});
|
|
}
|
|
|
|
const statusQuery = [];
|
|
if (options.statusFilter?.includes(StatusFilter.Published)) {
|
|
statusQuery.push({
|
|
[Op.and]: [
|
|
{
|
|
publishedAt: {
|
|
[Op.ne]: null,
|
|
},
|
|
archivedAt: {
|
|
[Op.eq]: null,
|
|
},
|
|
},
|
|
],
|
|
});
|
|
}
|
|
|
|
if (
|
|
options.statusFilter?.includes(StatusFilter.Draft) &&
|
|
// Only ever include draft results for the user's own documents
|
|
model instanceof User
|
|
) {
|
|
statusQuery.push({
|
|
[Op.and]: [
|
|
{
|
|
publishedAt: {
|
|
[Op.eq]: null,
|
|
},
|
|
archivedAt: {
|
|
[Op.eq]: null,
|
|
},
|
|
[Op.or]: [
|
|
{ createdById: model.id },
|
|
{ "$memberships.id$": { [Op.ne]: null } },
|
|
],
|
|
},
|
|
],
|
|
});
|
|
}
|
|
|
|
if (options.statusFilter?.includes(StatusFilter.Archived)) {
|
|
statusQuery.push({
|
|
archivedAt: {
|
|
[Op.ne]: null,
|
|
},
|
|
});
|
|
}
|
|
|
|
if (statusQuery.length) {
|
|
where[Op.and].push({
|
|
[Op.or]: statusQuery,
|
|
});
|
|
}
|
|
|
|
if (options.query) {
|
|
// find words that look like urls, these should be treated separately as the postgres full-text
|
|
// index will generally not match them.
|
|
let likelyUrls = getUrls(options.query);
|
|
|
|
// remove likely urls, and escape the rest of the query.
|
|
let limitedQuery = PostgresSearchProvider.escapeQuery(
|
|
likelyUrls
|
|
.reduce((q, url) => q.replace(url, ""), options.query)
|
|
.slice(0, PostgresSearchProvider.maxQueryLength)
|
|
.trim()
|
|
);
|
|
|
|
// Escape the URLs
|
|
likelyUrls = likelyUrls.map((url) =>
|
|
PostgresSearchProvider.escapeQuery(url)
|
|
);
|
|
|
|
// Extract quoted queries and add them to the where clause, up to a maximum of 3 total.
|
|
const quotedQueries = Array.from(limitedQuery.matchAll(/"([^"]*)"/g)).map(
|
|
(match) => match[1]
|
|
);
|
|
|
|
// remove quoted queries from the limited query
|
|
limitedQuery = limitedQuery.replace(/"([^"]*)"/g, "");
|
|
|
|
const iLikeQueries = [...quotedQueries, ...likelyUrls].slice(0, 3);
|
|
|
|
for (const match of iLikeQueries) {
|
|
where[Op.and].push({
|
|
[Op.or]: [
|
|
{
|
|
title: {
|
|
[Op.iLike]: `%${match}%`,
|
|
},
|
|
},
|
|
{
|
|
text: {
|
|
[Op.iLike]: `%${match}%`,
|
|
},
|
|
},
|
|
],
|
|
});
|
|
}
|
|
|
|
if (limitedQuery || iLikeQueries.length === 0) {
|
|
where[Op.and].push(
|
|
Sequelize.fn(
|
|
`"searchVector" @@ to_tsquery`,
|
|
"english",
|
|
Sequelize.literal(":query")
|
|
)
|
|
);
|
|
}
|
|
}
|
|
|
|
return where;
|
|
}
|
|
|
|
private static buildResponse({
|
|
query,
|
|
results,
|
|
documents,
|
|
count,
|
|
}: {
|
|
query?: string;
|
|
results: RankedDocument[];
|
|
documents: Document[];
|
|
count: number;
|
|
}): SearchResponse {
|
|
return {
|
|
results: map(results, (result) => {
|
|
const document = find(documents, {
|
|
id: result.id,
|
|
}) as Document;
|
|
|
|
return {
|
|
ranking: result.dataValues.searchRanking,
|
|
context: query
|
|
? PostgresSearchProvider.buildResultContext(document, query)
|
|
: undefined,
|
|
document,
|
|
};
|
|
}),
|
|
total: count,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Convert a user search query into a format that can be used by Postgres.
|
|
*
|
|
* @param query - the user search query.
|
|
* @returns the query formatted for Postgres ts_query.
|
|
*/
|
|
public static webSearchQuery(query: string): string {
|
|
// limit length of search queries as we're using regex against untrusted input
|
|
let limitedQuery = PostgresSearchProvider.escapeQuery(
|
|
query.slice(0, PostgresSearchProvider.maxQueryLength)
|
|
);
|
|
|
|
const quotedSearch =
|
|
limitedQuery.startsWith('"') && limitedQuery.endsWith('"');
|
|
|
|
// Replace single quote characters with &.
|
|
// Reset regex lastIndex to avoid state issues with global regex
|
|
PostgresSearchProvider.SINGLE_QUOTE_REGEX.lastIndex = 0;
|
|
const singleQuotes = limitedQuery.matchAll(
|
|
PostgresSearchProvider.SINGLE_QUOTE_REGEX
|
|
);
|
|
|
|
for (const match of singleQuotes) {
|
|
if (
|
|
match.index &&
|
|
match.index > 0 &&
|
|
match.index < limitedQuery.length - 1
|
|
) {
|
|
limitedQuery =
|
|
limitedQuery.substring(0, match.index) +
|
|
"&" +
|
|
limitedQuery.substring(match.index + 1);
|
|
}
|
|
}
|
|
|
|
return (
|
|
queryParser()(
|
|
// Although queryParser trims the query, looks like there's a
|
|
// bug for certain cases where it removes other characters in addition to
|
|
// spaces. Ref: https://github.com/caub/pg-tsquery/issues/27
|
|
quotedSearch ? limitedQuery.trim() : `${limitedQuery.trim()}*`
|
|
)
|
|
// Remove any trailing join characters
|
|
.replace(/&$/, "")
|
|
// Remove any trailing escape characters
|
|
.replace(/\\$/, "")
|
|
);
|
|
}
|
|
|
|
private static escapeQuery(query: string): string {
|
|
return (
|
|
query
|
|
// replace "\" with escaped "\\" because sequelize.escape doesn't do it
|
|
// see: https://github.com/sequelize/sequelize/issues/2950
|
|
.replace(/\\/g, "\\\\")
|
|
// replace ":" with escaped "\:" because it's a reserved character in tsquery
|
|
// see: https://github.com/outline/outline/issues/6542
|
|
.replace(/:/g, "\\:")
|
|
);
|
|
}
|
|
|
|
private static removeStopWords(query: string): string {
|
|
return query
|
|
.split(" ")
|
|
.filter((word) => !PostgresSearchProvider.STOP_WORDS.has(word))
|
|
.join(" ");
|
|
}
|
|
}
|