mirror of
https://github.com/outline/outline.git
synced 2026-06-13 03:14:59 +03:00
432 lines
14 KiB
TypeScript
432 lines
14 KiB
TypeScript
import path from "node:path";
|
|
import fs from "fs-extra";
|
|
import tmp from "tmp";
|
|
import type { Entry } from "yauzl";
|
|
import yauzl, { validateFileName } from "yauzl";
|
|
import type { ZipFile } from "yazl";
|
|
import { bytesToHumanReadable } from "@shared/utils/files";
|
|
import { ValidationError } from "@server/errors";
|
|
import Logger from "@server/logging/Logger";
|
|
import { trace } from "@server/logging/tracing";
|
|
import { deserializeFilename, trimFilenameAndExt } from "./fs";
|
|
|
|
const MAX_FILE_NAME_LENGTH = 255;
|
|
|
|
export interface ZipEntryHandle {
|
|
/** UTF-8 filename as recorded in the zip; directory entries end with `/`. */
|
|
fileName: string;
|
|
/** Size of the uncompressed entry in bytes. */
|
|
uncompressedSize: number;
|
|
/** True when this entry is a directory marker rather than a file. */
|
|
isDirectory: boolean;
|
|
/**
|
|
* Read the entry's contents into memory. Safe to skip — entries the caller
|
|
* does not read are simply advanced past.
|
|
*
|
|
* @param maxSize Maximum uncompressed size to read into memory, in bytes.
|
|
*/
|
|
readBuffer(maxSize: number): Promise<Buffer>;
|
|
}
|
|
|
|
export interface ZipTreeNode {
|
|
/** The file name (last path segment) including extension. */
|
|
name: string;
|
|
/** Title derived from the file name (extension stripped, deserialized). */
|
|
title: string;
|
|
/** Path within the zip (no leading slash, segments joined with `/`). */
|
|
pathInZip: string;
|
|
/** Nested children — populated for directory entries. */
|
|
children: ZipTreeNode[];
|
|
}
|
|
|
|
@trace()
|
|
export default class ZipHelper {
|
|
/**
|
|
* Write a zip file to a temporary disk location.
|
|
*
|
|
* The caller is responsible for adding entries to the `ZipFile`; this method
|
|
* calls `end()` and waits for the output stream to drain to disk.
|
|
*
|
|
* @param zip yazl ZipFile object with entries already added.
|
|
* @returns pathname of the temporary file where the zip was written to disk.
|
|
*/
|
|
public static async toTmpFile(zip: ZipFile): Promise<string> {
|
|
Logger.debug("utils", "Creating tmp file…");
|
|
return new Promise((resolve, reject) => {
|
|
tmp.file(
|
|
{
|
|
prefix: "export-",
|
|
postfix: ".zip",
|
|
},
|
|
(err, filePath) => {
|
|
if (err) {
|
|
return reject(err);
|
|
}
|
|
|
|
const handleError = (error: Error) => {
|
|
dest.destroy();
|
|
fs.remove(filePath)
|
|
.catch((rmErr) => {
|
|
Logger.error("Failed to remove tmp file", rmErr);
|
|
})
|
|
.finally(() => {
|
|
reject(error);
|
|
});
|
|
};
|
|
|
|
const dest = fs
|
|
.createWriteStream(filePath)
|
|
.on("finish", () => {
|
|
Logger.debug("utils", "Writing zip complete", { path: filePath });
|
|
return resolve(filePath);
|
|
})
|
|
.on("error", handleError);
|
|
|
|
zip.outputStream.on("error", handleError).pipe(dest);
|
|
zip.end();
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Iterate through entries in a zip file without extracting it to disk.
|
|
* Entries are visited serially in archive order. `onEntry` may be async; the
|
|
* next entry is only read once the previous handler resolves.
|
|
*
|
|
* @param filePath The file path where the zip is located.
|
|
* @param onEntry Handler invoked for each entry. Skip an entry by returning
|
|
* without calling `entry.readBuffer(maxSize)`.
|
|
* @returns Promise that resolves once the archive has been fully walked.
|
|
*/
|
|
public static walk(
|
|
filePath: string,
|
|
onEntry: (entry: ZipEntryHandle) => Promise<void> | void
|
|
): Promise<void> {
|
|
return new Promise((resolve, reject) => {
|
|
yauzl.open(
|
|
filePath,
|
|
{
|
|
lazyEntries: true,
|
|
autoClose: true,
|
|
decodeStrings: false,
|
|
},
|
|
function (err, zipfile) {
|
|
if (err) {
|
|
return reject(err);
|
|
}
|
|
|
|
let settled = false;
|
|
const fail = (error: Error) => {
|
|
if (settled) {
|
|
return;
|
|
}
|
|
settled = true;
|
|
zipfile.close();
|
|
reject(error);
|
|
};
|
|
|
|
zipfile.on("entry", (entry: Entry) => {
|
|
const fileName = Buffer.from(entry.fileName).toString("utf8");
|
|
|
|
if (validateFileName(fileName)) {
|
|
Logger.warn("Invalid zip entry", { fileName });
|
|
zipfile.readEntry();
|
|
return;
|
|
}
|
|
|
|
const handle: ZipEntryHandle = {
|
|
fileName,
|
|
uncompressedSize: entry.uncompressedSize,
|
|
isDirectory: /\/$/.test(fileName),
|
|
readBuffer: (maxSize) =>
|
|
new Promise<Buffer>((res, rej) => {
|
|
if (entry.uncompressedSize > maxSize) {
|
|
return rej(ZipHelper.entryTooLargeError(fileName, maxSize));
|
|
}
|
|
|
|
zipfile.openReadStream(entry, (rErr, readStream) => {
|
|
if (rErr) {
|
|
return rej(rErr);
|
|
}
|
|
const chunks: Buffer[] = [];
|
|
let bytesRead = 0;
|
|
let settled = false;
|
|
readStream.on("data", (chunk: Buffer) => {
|
|
bytesRead += chunk.length;
|
|
if (bytesRead > maxSize) {
|
|
readStream.destroy(
|
|
ZipHelper.entryTooLargeError(fileName, maxSize)
|
|
);
|
|
return;
|
|
}
|
|
chunks.push(chunk);
|
|
});
|
|
readStream.on("end", () => {
|
|
if (!settled) {
|
|
settled = true;
|
|
res(Buffer.concat(chunks));
|
|
}
|
|
});
|
|
readStream.on("error", (err) => {
|
|
if (!settled) {
|
|
settled = true;
|
|
rej(err);
|
|
}
|
|
});
|
|
readStream.on("close", () => {
|
|
if (!settled) {
|
|
settled = true;
|
|
rej(
|
|
new Error(
|
|
`Stream closed before completing read of ${fileName}`
|
|
)
|
|
);
|
|
}
|
|
});
|
|
});
|
|
}),
|
|
};
|
|
|
|
Promise.resolve()
|
|
.then(() => onEntry(handle))
|
|
.then(() => {
|
|
if (!settled) {
|
|
zipfile.readEntry();
|
|
}
|
|
})
|
|
.catch(fail);
|
|
});
|
|
|
|
zipfile.on("close", () => {
|
|
if (!settled) {
|
|
settled = true;
|
|
resolve();
|
|
}
|
|
});
|
|
zipfile.on("error", (error) => fail(error));
|
|
zipfile.readEntry();
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Walk a zip file once and build a tree of its entries without extracting
|
|
* to disk. macOS metadata directories (`__MACOSX`) and dotfiles are
|
|
* filtered out at any path segment.
|
|
*
|
|
* The optional `onFile` callback fires once per file entry as it is
|
|
* encountered, with both the materialized tree node and a handle to the
|
|
* raw entry. Callers that need to pre-load contents (e.g. small text
|
|
* files) can call `entry.readBuffer(maxSize)`; callers that only need the tree
|
|
* structure can omit the callback entirely.
|
|
*
|
|
* @param filePath Local filesystem path to the zip.
|
|
* @param onFile Optional per-file hook; not called for directory entries.
|
|
* @returns A synthetic root node whose `children` are the zip's top-level
|
|
* entries.
|
|
*/
|
|
public static async toFileTree(
|
|
filePath: string,
|
|
onFile?: (node: ZipTreeNode, entry: ZipEntryHandle) => Promise<void> | void
|
|
): Promise<ZipTreeNode> {
|
|
const root: ZipTreeNode = {
|
|
name: "",
|
|
title: "",
|
|
pathInZip: "",
|
|
children: [],
|
|
};
|
|
|
|
const isFiltered = (segment: string) =>
|
|
segment === "__MACOSX" || segment.startsWith(".");
|
|
|
|
const nodesByPath = new Map<string, ZipTreeNode>();
|
|
|
|
const resolveNode = (entryName: string): ZipTreeNode | null => {
|
|
// Drop empty segments and the path-no-op `.` (e.g. entries written as
|
|
// `./Collection/page.md`). `..` is preserved so the dotfile filter
|
|
// below rejects it — we never resolve path traversal in zip entries.
|
|
const segments = entryName
|
|
.split("/")
|
|
.filter((s) => s !== "" && s !== ".");
|
|
if (segments.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
let current = root;
|
|
let pathSoFar = "";
|
|
for (let i = 0; i < segments.length; i++) {
|
|
const segment = segments[i];
|
|
if (isFiltered(segment)) {
|
|
return null;
|
|
}
|
|
|
|
pathSoFar = pathSoFar ? `${pathSoFar}/${segment}` : segment;
|
|
let next = nodesByPath.get(pathSoFar);
|
|
if (!next) {
|
|
next = {
|
|
name: segment,
|
|
title: deserializeFilename(path.parse(segment).name),
|
|
pathInZip: pathSoFar,
|
|
children: [],
|
|
};
|
|
current.children.push(next);
|
|
nodesByPath.set(pathSoFar, next);
|
|
}
|
|
current = next;
|
|
}
|
|
|
|
return current;
|
|
};
|
|
|
|
await this.walk(filePath, async (entry) => {
|
|
const node = resolveNode(entry.fileName);
|
|
if (!node || entry.isDirectory) {
|
|
return;
|
|
}
|
|
if (onFile) {
|
|
await onFile(node, entry);
|
|
}
|
|
});
|
|
|
|
return root;
|
|
}
|
|
|
|
/**
|
|
* Write a zip file to a disk location
|
|
*
|
|
* @param filePath The file path where the zip is located
|
|
* @param outputDir The directory where the zip should be extracted
|
|
*/
|
|
public static extract(filePath: string, outputDir: string): Promise<void> {
|
|
return new Promise((resolve, reject) => {
|
|
Logger.debug("utils", "Opening zip file", { filePath });
|
|
|
|
yauzl.open(
|
|
filePath,
|
|
{
|
|
lazyEntries: true,
|
|
autoClose: true,
|
|
// Filenames are validated inside on("entry") handler instead of within yauzl as some
|
|
// otherwise valid zip files (including those in our test suite) include / path. We can
|
|
// safely read but skip writing these.
|
|
// see: https://github.com/thejoshwolfe/yauzl/issues/135
|
|
decodeStrings: false,
|
|
},
|
|
function (err, zipfile) {
|
|
if (err) {
|
|
return reject(err);
|
|
}
|
|
try {
|
|
zipfile.readEntry();
|
|
zipfile.on("entry", function (entry: Entry) {
|
|
const filePath = Buffer.from(entry.fileName).toString("utf8");
|
|
Logger.debug("utils", "Extracting zip entry", { filePath });
|
|
|
|
const processNext = (error?: NodeJS.ErrnoException | null) => {
|
|
if (error) {
|
|
zipfile.close();
|
|
reject(error);
|
|
return;
|
|
}
|
|
zipfile.readEntry();
|
|
};
|
|
|
|
if (validateFileName(filePath)) {
|
|
Logger.warn("Invalid zip entry", { filePath });
|
|
processNext();
|
|
return;
|
|
}
|
|
|
|
if (/\/$/.test(filePath)) {
|
|
// directory file names end with '/'
|
|
fs.mkdirp(path.join(outputDir, filePath), (mkErr) =>
|
|
processNext(mkErr)
|
|
);
|
|
} else {
|
|
// file entry
|
|
zipfile.openReadStream(entry, function (rErr, readStream) {
|
|
if (rErr) {
|
|
return processNext(rErr);
|
|
}
|
|
// ensure parent directory exists
|
|
fs.mkdirp(
|
|
path.join(outputDir, path.dirname(filePath)),
|
|
function (mkErr) {
|
|
if (mkErr) {
|
|
return processNext(mkErr);
|
|
}
|
|
|
|
const fileName = trimFilenameAndExt(
|
|
path.basename(filePath),
|
|
MAX_FILE_NAME_LENGTH
|
|
);
|
|
|
|
const resolvedOutput = path.resolve(outputDir);
|
|
const location = path.resolve(
|
|
resolvedOutput,
|
|
path.dirname(filePath),
|
|
fileName
|
|
);
|
|
|
|
if (
|
|
location !== resolvedOutput &&
|
|
!location.startsWith(resolvedOutput + path.sep)
|
|
) {
|
|
Logger.warn("Zip entry escapes extraction directory", {
|
|
filePath,
|
|
location,
|
|
});
|
|
readStream.destroy();
|
|
return processNext();
|
|
}
|
|
|
|
const dest = fs
|
|
.createWriteStream(location)
|
|
.on("error", (error) => {
|
|
readStream.destroy();
|
|
dest.destroy();
|
|
processNext(error);
|
|
});
|
|
|
|
readStream
|
|
.on("error", (error) => {
|
|
dest.destroy();
|
|
readStream.destroy();
|
|
processNext(error);
|
|
})
|
|
.on("end", function () {
|
|
processNext();
|
|
})
|
|
.pipe(dest);
|
|
}
|
|
);
|
|
});
|
|
}
|
|
});
|
|
zipfile.on("close", resolve);
|
|
zipfile.on("error", (error) => {
|
|
zipfile.close();
|
|
reject(error);
|
|
});
|
|
} catch (zErr) {
|
|
if (zipfile) {
|
|
zipfile.close();
|
|
}
|
|
reject(zErr);
|
|
}
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
private static entryTooLargeError(fileName: string, maxSize: number): Error {
|
|
return ValidationError(
|
|
`${fileName} is too large - the maximum size is ${bytesToHumanReadable(
|
|
maxSize
|
|
)}`
|
|
);
|
|
}
|
|
}
|