jormungandr-bite/packages/backend/src/misc/get-file-info.ts

444 lines
10 KiB
TypeScript
Raw Normal View History

2023-01-12 21:40:33 -07:00
import * as fs from "node:fs";
import * as crypto from "node:crypto";
import { join } from "node:path";
import * as stream from "node:stream";
import * as util from "node:util";
import { FSWatcher } from "chokidar";
import { fileTypeFromFile } from "file-type";
2023-05-19 20:26:13 -06:00
import probeImageSize from "probe-image-size";
2023-01-12 21:40:33 -07:00
import FFmpeg from "fluent-ffmpeg";
import isSvg from "is-svg";
import { type predictionType } from "nsfwjs";
import sharp from "sharp";
import { encode } from "blurhash";
import { detectSensitive } from "@/services/detect-sensitive.js";
import { createTempDir } from "./create-temp.js";
const pipeline = util.promisify(stream.pipeline);
export type FileInfo = {
size: number;
md5: string;
type: {
mime: string;
ext: string | null;
};
width?: number;
height?: number;
orientation?: number;
blurhash?: string;
sensitive: boolean;
porn: boolean;
warnings: string[];
};
const TYPE_OCTET_STREAM = {
2023-01-12 21:40:33 -07:00
mime: "application/octet-stream",
2021-12-09 07:58:30 -07:00
ext: null,
};
const TYPE_SVG = {
2023-01-12 21:40:33 -07:00
mime: "image/svg+xml",
ext: "svg",
};
/**
* Get file information
*/
2023-01-12 21:40:33 -07:00
export async function getFileInfo(
path: string,
opts: {
skipSensitiveDetection: boolean;
sensitiveThreshold?: number;
sensitiveThresholdForPorn?: number;
enableSensitiveMediaDetectionForVideos?: boolean;
},
): Promise<FileInfo> {
const warnings = [] as string[];
const size = await getFileSize(path);
const md5 = await calcHash(path);
let type = await detectType(path);
// image dimensions
let width: number | undefined;
let height: number | undefined;
let orientation: number | undefined;
2023-01-12 21:40:33 -07:00
if (
[
"image/jpeg",
"image/gif",
"image/png",
"image/apng",
"image/webp",
"image/bmp",
"image/tiff",
"image/svg+xml",
"image/vnd.adobe.photoshop",
"image/avif",
].includes(type.mime)
) {
const imageSize = await detectImageSize(path).catch((e) => {
warnings.push(`detectImageSize failed: ${e}`);
return undefined;
});
// うまく判定できない画像は octet-stream にする
if (!imageSize) {
2023-01-12 21:40:33 -07:00
warnings.push("cannot detect image dimensions");
type = TYPE_OCTET_STREAM;
2023-01-12 21:40:33 -07:00
} else if (imageSize.wUnits === "px") {
width = imageSize.width;
height = imageSize.height;
orientation = imageSize.orientation;
// 制限を超えている画像は octet-stream にする
if (imageSize.width > 16383 || imageSize.height > 16383) {
2023-01-12 21:40:33 -07:00
warnings.push("image dimensions exceeds limits");
type = TYPE_OCTET_STREAM;
}
} else {
warnings.push(`unsupported unit type: ${imageSize.wUnits}`);
}
}
let blurhash: string | undefined;
2023-01-12 21:40:33 -07:00
if (
[
"image/jpeg",
"image/gif",
"image/png",
"image/apng",
"image/webp",
"image/svg+xml",
"image/avif",
].includes(type.mime)
) {
blurhash = await getBlurhash(path).catch((e) => {
warnings.push(`getBlurhash failed: ${e}`);
return undefined;
});
}
let sensitive = false;
let porn = false;
if (!opts.skipSensitiveDetection) {
await detectSensitivity(
path,
type.mime,
opts.sensitiveThreshold ?? 0.5,
opts.sensitiveThresholdForPorn ?? 0.75,
opts.enableSensitiveMediaDetectionForVideos ?? false,
2023-01-12 21:40:33 -07:00
).then(
(value) => {
[sensitive, porn] = value;
},
(error) => {
warnings.push(`detectSensitivity failed: ${error}`);
},
);
}
return {
size,
md5,
type,
width,
height,
orientation,
blurhash,
sensitive,
porn,
warnings,
};
}
2023-01-12 21:40:33 -07:00
async function detectSensitivity(
source: string,
mime: string,
sensitiveThreshold: number,
sensitiveThresholdForPorn: number,
analyzeVideo: boolean,
): Promise<[sensitive: boolean, porn: boolean]> {
let sensitive = false;
let porn = false;
2023-01-12 21:40:33 -07:00
function judgePrediction(
result: readonly predictionType[],
): [sensitive: boolean, porn: boolean] {
let sensitive = false;
let porn = false;
2023-01-12 21:40:33 -07:00
if (
(result.find((x) => x.className === "Sexy")?.probability ?? 0) >
sensitiveThreshold
)
sensitive = true;
if (
(result.find((x) => x.className === "Hentai")?.probability ?? 0) >
sensitiveThreshold
)
sensitive = true;
if (
(result.find((x) => x.className === "Porn")?.probability ?? 0) >
sensitiveThreshold
)
sensitive = true;
if (
(result.find((x) => x.className === "Porn")?.probability ?? 0) >
sensitiveThresholdForPorn
)
porn = true;
return [sensitive, porn];
}
2023-01-12 21:40:33 -07:00
if (["image/jpeg", "image/png", "image/webp"].includes(mime)) {
const result = await detectSensitive(source);
if (result) {
[sensitive, porn] = judgePrediction(result);
}
2023-01-12 21:40:33 -07:00
} else if (
analyzeVideo &&
(mime === "image/apng" || mime.startsWith("video/"))
) {
const [outDir, disposeOutDir] = await createTempDir();
try {
const command = FFmpeg()
.input(source)
.inputOptions([
2023-01-12 21:40:33 -07:00
"-skip_frame",
"nokey", // 可能ならキーフレームのみを取得してほしいとする(そうなるとは限らない)
"-lowres",
"3", // 元の画質でデコードする必要はないので 1/8 画質でデコードしてもよいとする(そうなるとは限らない)
])
.noAudio()
.videoFilters([
{
2023-01-12 21:40:33 -07:00
filter: "select", // フレームのフィルタリング
options: {
2023-01-12 21:40:33 -07:00
e: "eq(pict_type,PICT_TYPE_I)", // I-Frame のみをフィルタするVP9 とかはデコードしてみないとわからないっぽい)
},
},
{
2023-01-12 21:40:33 -07:00
filter: "blackframe", // 暗いフレームの検出
options: {
2023-01-12 21:40:33 -07:00
amount: "0", // 暗さに関わらず全てのフレームで測定値を取る
},
},
{
2023-01-12 21:40:33 -07:00
filter: "metadata",
options: {
2023-01-12 21:40:33 -07:00
mode: "select", // フレーム選択モード
key: "lavfi.blackframe.pblack", // フレームにおける暗部の百分率(前のフィルタからのメタデータを参照する)
value: "50",
function: "less", // 50% 未満のフレームを選択する50% 以上暗部があるフレームだと誤検知を招くかもしれないので)
},
},
{
2023-01-12 21:40:33 -07:00
filter: "scale",
options: {
w: 299,
h: 299,
},
},
])
2023-01-12 21:40:33 -07:00
.format("image2")
.output(join(outDir, "%d.png"))
.outputOptions(["-vsync", "0"]); // 可変フレームレートにすることで穴埋めをさせない
const results: ReturnType<typeof judgePrediction>[] = [];
let frameIndex = 0;
let targetIndex = 0;
let nextIndex = 1;
for await (const path of asyncIterateFrames(outDir, command)) {
try {
const index = frameIndex++;
if (index !== targetIndex) {
continue;
}
targetIndex = nextIndex;
nextIndex += index; // fibonacci sequence によってフレーム数制限を掛ける
const result = await detectSensitive(path);
if (result) {
results.push(judgePrediction(result));
}
} finally {
fs.promises.unlink(path);
}
}
2023-01-12 21:40:33 -07:00
sensitive =
results.filter((x) => x[0]).length >=
Math.ceil(results.length * sensitiveThreshold);
porn =
results.filter((x) => x[1]).length >=
Math.ceil(results.length * sensitiveThresholdForPorn);
} finally {
disposeOutDir();
}
}
return [sensitive, porn];
}
2023-01-12 21:40:33 -07:00
async function* asyncIterateFrames(
cwd: string,
command: FFmpeg.FfmpegCommand,
): AsyncGenerator<string, void> {
const watcher = new FSWatcher({
cwd,
disableGlobbing: true,
});
let finished = false;
2023-01-12 21:40:33 -07:00
command.once("end", () => {
finished = true;
watcher.close();
});
command.run();
2023-01-12 21:40:33 -07:00
for (let i = 1; true; i++) {
const current = `${i}.png`;
const next = `${i + 1}.png`;
const framePath = join(cwd, current);
if (await exists(join(cwd, next))) {
yield framePath;
2023-01-12 21:40:33 -07:00
} else if (!finished) {
watcher.add(next);
await new Promise<void>((resolve, reject) => {
2023-01-12 21:40:33 -07:00
watcher.on("add", function onAdd(path) {
if (path === next) {
// 次フレームの書き出しが始まっているなら、現在フレームの書き出しは終わっている
watcher.unwatch(current);
2023-01-12 21:40:33 -07:00
watcher.off("add", onAdd);
resolve();
}
});
2023-01-12 21:40:33 -07:00
command.once("end", resolve); // 全てのフレームを処理し終わったなら、最終フレームである現在フレームの書き出しは終わっている
command.once("error", reject);
});
yield framePath;
} else if (await exists(framePath)) {
yield framePath;
} else {
return;
}
}
}
function exists(path: string): Promise<boolean> {
2023-01-12 21:40:33 -07:00
return fs.promises.access(path).then(
() => true,
() => false,
);
}
/**
* Detect MIME Type and extension
*/
2021-12-25 09:42:06 -07:00
export async function detectType(path: string): Promise<{
mime: string;
ext: string | null;
}> {
// Check 0 byte
const fileSize = await getFileSize(path);
if (fileSize === 0) {
return TYPE_OCTET_STREAM;
}
2022-03-07 07:51:34 -07:00
const type = await fileTypeFromFile(path);
if (type) {
// XMLはSVGかもしれない
2023-01-12 21:40:33 -07:00
if (type.mime === "application/xml" && (await checkSvg(path))) {
return TYPE_SVG;
}
return {
mime: type.mime,
2021-12-09 07:58:30 -07:00
ext: type.ext,
};
}
// 種類が不明でもSVGかもしれない
if (await checkSvg(path)) {
return TYPE_SVG;
}
// それでも種類が不明なら application/octet-stream にする
return TYPE_OCTET_STREAM;
}
/**
* Check the file is SVG or not
*/
export async function checkSvg(path: string) {
try {
const size = await getFileSize(path);
if (size > 1 * 1024 * 1024) return false;
return isSvg(fs.readFileSync(path));
} catch {
return false;
}
}
/**
* Get file size
*/
export async function getFileSize(path: string): Promise<number> {
const getStat = util.promisify(fs.stat);
return (await getStat(path)).size;
}
/**
* Calculate MD5 hash
*/
async function calcHash(path: string): Promise<string> {
2023-01-12 21:40:33 -07:00
const hash = crypto.createHash("md5").setEncoding("hex");
await pipeline(fs.createReadStream(path), hash);
return hash.read();
}
/**
* Detect dimensions of image
*/
async function detectImageSize(path: string): Promise<{
width: number;
height: number;
wUnits: string;
hUnits: string;
orientation?: number;
}> {
const readable = fs.createReadStream(path);
const imageSize = await probeImageSize(readable);
readable.destroy();
return imageSize;
}
/**
* Calculate average color of image
*/
function getBlurhash(path: string): Promise<string> {
return new Promise((resolve, reject) => {
sharp(path)
.raw()
.ensureAlpha()
2023-01-12 21:40:33 -07:00
.resize(64, 64, { fit: "inside" })
.toBuffer((err, buffer, { width, height }) => {
if (err) return reject(err);
Migrate to Vue3 (#6587) * Update reaction.vue * fix bug * wip * wip * wjio * wip * Revert "wip" This reverts commit e427f2160adf4e8a4147006e25a89854edab0033. * wip * wip * wip * Update init.ts * Update drive-window.vue * wip * wip * Use PascalCase for components * Use PascalCase for components * update dep * wip * wip * wip * Update init.ts * wip * Update paging.ts * Update test.vue * watch deep * wip * lint * wip * wip * wip * wip * wiop * wip * Update webpack.config.ts * alllow null poll * wip * wip * wip * wiop * UI redesign & refactor (#6714) * wip * wip * wip * wip * wip * Update drive.vue * Update word-mute.vue * wip * wip * wip * clean up * wip * Update default.vue * wip * Update notes.vue * Update mfm.ts * Update index.home.vue * Update post-form.vue * Update post-form-attaches.vue * wip * Update post-form.vue * Update sidebar.vue * wip * wip * Update index.vue * wip * Update default.vue * Update index.vue * Update index.vue * wip * Update post-form-attaches.vue * Update note.vue * wip * clean up * Update notes.vue * wip * wip * Update ja-JP.yml * wip * wip * Update index.vue * wip * wip * wip * wip * wip * wip * wip * wip * Update default.vue * wip * Update _dark.json5 * wip * wip * wip * clean up * wip * wip * Update index.vue * Update test.vue * wip * wip * fix * wip * wip * wip * wip * clena yop * wip * wip * Update store.ts * Update messaging-room.vue * Update default.widgets.vue * fix * wip * wip * Update modal.vue * wip * Update os.ts * Update os.ts * Update deck.vue * Update init.ts * wip * Update ja-JP.yml * v-sizeは単にwindowのresizeを監視するだけで良いかもしれない * Update modal.vue * wip * Update tooltip.ts * wip * wip * wip * wip * wip * Update image-viewer.vue * wip * wip * Update style.scss * Update style.scss * Update visitor.vue * wip * Update init.ts * Update init.ts * wip * wip * Update visitor.vue * Update visitor.vue * Update visitor.vue * Update visitor.vue * wip * wip * Update modal.vue * Update header.vue * Update menu.vue * Update about.vue * Update about-misskey.vue * wip * wip * Update visitor.vue * Update tooltip.ts * wip * Update drive.vue * wip * Update style.scss * Update header.vue * wip * wip * Update users.user.vue * Update announcements.vue * wip * wip * wip * Update emojis.vue * wip * Update emojis.vue * Update style.scss * Update users.vue * wip * Update style.scss * wip * Update welcome.entrance.vue * Update radio.vue * Update size.ts * Update emoji-edit-dialog.vue * wip * Update emojis.vue * wip * Update emojis.vue * Update emojis.vue * Update emojis.vue * wip * wip * wip * wip * Update file-dialog.vue * wip * wip * Update token-generate-window.vue * Update notification-setting-window.vue * wip * wip * Update _error_.vue * Update ja-JP.yml * wip * wip * Update store.ts * Update emojis.vue * Update emojis.vue * Update emojis.vue * Update announcements.vue * Update store.ts * wip * Update page-editor.vue * wip * wip * Update modal.vue * wip * Update select-file.ts * Update timeline.vue * Update emojis.vue * Update os.ts * wip * Update user-select.vue * Update mfm.ts * Update get-file-info.ts * Update drive.vue * Update init.ts * Update mfm.ts * wip * wip * Update window.vue * Update note.vue * wip * wip * Update user-info.vue * wip * wip * wip * wip * wip * Update header.vue * Update header.vue * wip * Update explore.vue * wip * wip * wip * Update webpack.config.ts * wip * wip * wip * wip * wip * wip * Update autocomplete.ts * wip * wip * wip * Update toast.vue * wip * Update post-form-dialog.vue * wip * wip * wip * wip * wip * Update users.vue * wip * Update explore.vue * wip * wip * wip * Update package.json * wip * Update icon-dialog.vue * wip * wip * Update user-preview.ts * wip * wip * wip * wip * wip * Update instance.vue * Update user-name.vue * Update federation.vue * Update instance.vue * wip * wip * Update tag.vue * wip * wip * wip * wip * wip * Update instance.vue * wip * Update os.ts * Update os.ts * wip * wip * wip * Update router.ts * wip * Update init.ts * Update note.vue * Update messages.vue * wip * wip * wip * wip * wip * google * wip * wip * wip * wip * Update theme-editor.vue * wip * wip * Update room.vue * Update channel-editor.vue * wip * Update window.vue * Update window.vue * wip * Update window.vue * Update window.vue * wip * Update menu.vue * wip * wip * wip * wip * Update messaging-room.vue * wip * Update post-form.vue * Update default.widgets.vue * Update window.vue * wip
2020-10-17 05:12:00 -06:00
let hash;
try {
hash = encode(new Uint8ClampedArray(buffer), width, height, 7, 7);
} catch (e) {
return reject(e);
}
resolve(hash);
});
});
}