chore: Short-circuit common scanner/crawler routes (#12306)

* Shortcircuit common scanner/crawler routes

* PR feedback, remove query strings
This commit is contained in:
Tom Moor
2026-05-09 11:32:17 -04:00
committed by GitHub
parent fba1bcef87
commit 7ff1c84530
4 changed files with 98 additions and 1 deletions
+33
View File
@@ -163,3 +163,36 @@ describe("/s/:id", () => {
expect(body).not.toContain("[Child Document]");
});
});
describe("scanner path 404s", () => {
it.each([
"/.well-known/gpc.json",
"/.env",
"/.git/config",
"/cgi-bin/test.cgi",
"/wp-admin/setup-config.php",
"/wp-login.php",
"/xmlrpc.php",
"/admin.php",
"/phpmyadmin/index.php",
"/actuator/health",
"/HNAP1/",
])("returns 404 for %s without rendering the app shell", async (path) => {
const res = await server.get(path);
const body = await res.text();
expect(res.status).toEqual(404);
expect(body).not.toContain("<title>");
});
it("still serves the app shell for legitimate unknown paths", async () => {
const res = await server.get("/some-app-route");
expect(res.status).toEqual(200);
});
it("still serves the OAuth well-known endpoint", async () => {
const res = await server.get("/.well-known/oauth-authorization-server");
expect(res.status).toEqual(200);
const body = await res.json();
expect(body.issuer).toBeDefined();
});
});
+6
View File
@@ -15,6 +15,7 @@ import { Integration } from "@server/models";
import { opensearchResponse } from "@server/utils/opensearch";
import { getTeamFromContext } from "@server/utils/passport";
import { robotsResponse } from "@server/utils/robots";
import { isInvalidAppPath } from "@server/utils/url";
import apexRedirect from "../middlewares/apexRedirect";
import { renderApp, renderShare } from "./app";
import { renderEmbed } from "./embeds";
@@ -217,6 +218,11 @@ router.get("/sitemap.xml", async (ctx) => {
// catch all for application
router.get("*", async (ctx, next) => {
if (isInvalidAppPath(ctx.path)) {
ctx.status = 404;
return;
}
if (ctx.state?.rootShare) {
// Only allow root path for root share domains, return 404 for other paths.
// Valid paths like /doc/:documentSlug and /sitemap.xml are handled above.
+34 -1
View File
@@ -1,7 +1,7 @@
import dns from "node:dns";
import type { MockInstance } from "vitest";
import env from "@server/env";
import { validateUrlNotPrivate } from "./url";
import { isInvalidAppPath, validateUrlNotPrivate } from "./url";
describe("validateUrlNotPrivate", () => {
let lookupSpy: MockInstance;
@@ -98,3 +98,36 @@ describe("validateUrlNotPrivate", () => {
});
});
});
describe("isInvalidAppPath", () => {
it.each([
"/.well-known/gpc.json",
"/.env",
"/.env.production",
"/.git/config",
"/.DS_Store",
"/cgi-bin/test.cgi",
"/wp-admin/setup-config.php",
"/wp-login.php",
"/wp-content/plugins/foo",
"/xmlrpc.php",
"/admin.php",
"/phpmyadmin/index.php",
"/actuator/health",
"/HNAP1/",
"/index.php",
])("returns true for scanner path %s", (path) => {
expect(isInvalidAppPath(path)).toBe(true);
});
it.each([
"/",
"/home",
"/doc/document-slug",
"/collection/abc123",
"/settings/account",
"/api/documents.list",
])("returns false for legitimate path %s", (path) => {
expect(isInvalidAppPath(path)).toBe(false);
});
});
+25
View File
@@ -18,6 +18,31 @@ const privateRanges = new Set([
export const generateUrlId = () => randomString(UrlIdLength);
// Paths probed by vulnerability scanners.
const scannerPathPattern = new RegExp(
[
// paths
"^\\/(?:cgi-bin|wp-admin|wp-content|wp-includes|wp-json|wp-login\\.php|wordpress|xmlrpc\\.php|phpmyadmin|pma|myadmin|owa|autodiscover|actuator|vendor|webdav|cms|drupal|joomla|magento|laravel|adminer|console|server-status|server-info|HNAP1|boaform|hudson|jenkins)(?:\\/|$)",
// file endings
"\\.(?:php|asp|aspx|jsp|cgi|env|sql|bak|swp|htaccess|htpasswd)(?:$|[/?])",
// dotfiles
"^\\/\\.(?:well-known|env|git|svn|aws|ssh|DS_Store)",
].join("|"),
"i"
);
/**
* Checks whether a request path looks like an automated scanner probe rather
* than a legitimate application route, so the server can short-circuit with a
* 404 instead of rendering the SPA shell.
*
* @param path - the request path to check.
* @returns true if the path matches a known scanner pattern.
*/
export function isInvalidAppPath(path: string): boolean {
return scannerPathPattern.test(path);
}
/**
* Checks if an IP address is private, loopback, or link-local.
*