2023-01-12 21:40:33 -07:00
|
|
|
import { URL } from "node:url";
|
|
|
|
import { JSDOM } from "jsdom";
|
|
|
|
import fetch from "node-fetch";
|
|
|
|
import tinycolor from "tinycolor2";
|
|
|
|
import { getJson, getHtml, getAgentByUrl } from "@/misc/fetch.js";
|
|
|
|
import type { Instance } from "@/models/entities/instance.js";
|
|
|
|
import { Instances } from "@/models/index.js";
|
|
|
|
import { getFetchInstanceMetadataLock } from "@/misc/app-lock.js";
|
|
|
|
import Logger from "./logger.js";
|
|
|
|
import type { DOMWindow } from "jsdom";
|
|
|
|
|
|
|
|
const logger = new Logger("metadata", "cyan");
|
|
|
|
|
|
|
|
export async function fetchInstanceMetadata(
|
|
|
|
instance: Instance,
|
|
|
|
force = false,
|
|
|
|
): Promise<void> {
|
2020-07-25 20:04:07 -06:00
|
|
|
const unlock = await getFetchInstanceMetadataLock(instance.host);
|
|
|
|
|
2020-10-27 01:44:54 -06:00
|
|
|
if (!force) {
|
2022-03-26 00:34:00 -06:00
|
|
|
const _instance = await Instances.findOneBy({ host: instance.host });
|
2020-10-27 01:44:54 -06:00
|
|
|
const now = Date.now();
|
2023-01-12 21:40:33 -07:00
|
|
|
if (
|
|
|
|
_instance?.infoUpdatedAt &&
|
|
|
|
now - _instance.infoUpdatedAt.getTime() < 1000 * 60 * 60 * 24
|
|
|
|
) {
|
2020-10-27 01:44:54 -06:00
|
|
|
unlock();
|
|
|
|
return;
|
|
|
|
}
|
2020-07-25 20:04:07 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
logger.info(`Fetching metadata of ${instance.host} ...`);
|
|
|
|
|
|
|
|
try {
|
2020-10-27 01:16:59 -06:00
|
|
|
const [info, dom, manifest] = await Promise.all([
|
2020-07-25 20:04:07 -06:00
|
|
|
fetchNodeinfo(instance).catch(() => null),
|
2020-10-27 01:16:59 -06:00
|
|
|
fetchDom(instance).catch(() => null),
|
|
|
|
fetchManifest(instance).catch(() => null),
|
|
|
|
]);
|
|
|
|
|
|
|
|
const [favicon, icon, themeColor, name, description] = await Promise.all([
|
2020-10-27 01:44:54 -06:00
|
|
|
fetchFaviconUrl(instance, dom).catch(() => null),
|
2020-10-27 01:16:59 -06:00
|
|
|
fetchIconUrl(instance, dom, manifest).catch(() => null),
|
2022-07-13 06:06:24 -06:00
|
|
|
getThemeColor(info, dom, manifest).catch(() => null),
|
2020-10-27 01:16:59 -06:00
|
|
|
getSiteName(info, dom, manifest).catch(() => null),
|
|
|
|
getDescription(info, dom, manifest).catch(() => null),
|
2020-07-25 20:04:07 -06:00
|
|
|
]);
|
|
|
|
|
|
|
|
logger.succ(`Successfuly fetched metadata of ${instance.host}`);
|
|
|
|
|
|
|
|
const updates = {
|
|
|
|
infoUpdatedAt: new Date(),
|
|
|
|
} as Record<string, any>;
|
|
|
|
|
|
|
|
if (info) {
|
2020-10-27 01:16:59 -06:00
|
|
|
updates.softwareName = info.software?.name.toLowerCase();
|
|
|
|
updates.softwareVersion = info.software?.version;
|
2020-07-25 20:04:07 -06:00
|
|
|
updates.openRegistrations = info.openRegistrations;
|
2023-01-12 21:40:33 -07:00
|
|
|
updates.maintainerName = info.metadata
|
|
|
|
? info.metadata.maintainer
|
|
|
|
? info.metadata.maintainer.name || null
|
|
|
|
: null
|
|
|
|
: null;
|
|
|
|
updates.maintainerEmail = info.metadata
|
|
|
|
? info.metadata.maintainer
|
|
|
|
? info.metadata.maintainer.email || null
|
|
|
|
: null
|
|
|
|
: null;
|
2020-07-25 20:04:07 -06:00
|
|
|
}
|
|
|
|
|
2020-10-27 01:16:59 -06:00
|
|
|
if (name) updates.name = name;
|
|
|
|
if (description) updates.description = description;
|
|
|
|
if (icon || favicon) updates.iconUrl = icon || favicon;
|
|
|
|
if (favicon) updates.faviconUrl = favicon;
|
|
|
|
if (themeColor) updates.themeColor = themeColor;
|
2020-07-25 20:04:07 -06:00
|
|
|
|
|
|
|
await Instances.update(instance.id, updates);
|
|
|
|
|
|
|
|
logger.succ(`Successfuly updated metadata of ${instance.host}`);
|
|
|
|
} catch (e) {
|
|
|
|
logger.error(`Failed to update metadata of ${instance.host}: ${e}`);
|
|
|
|
} finally {
|
|
|
|
unlock();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-27 01:16:59 -06:00
|
|
|
type NodeInfo = {
|
|
|
|
openRegistrations?: any;
|
|
|
|
software?: {
|
|
|
|
name?: any;
|
|
|
|
version?: any;
|
|
|
|
};
|
|
|
|
metadata?: {
|
|
|
|
name?: any;
|
|
|
|
nodeName?: any;
|
|
|
|
nodeDescription?: any;
|
|
|
|
description?: any;
|
|
|
|
maintainer?: {
|
|
|
|
name?: any;
|
|
|
|
email?: any;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
async function fetchNodeinfo(instance: Instance): Promise<NodeInfo> {
|
2020-07-25 20:04:07 -06:00
|
|
|
logger.info(`Fetching nodeinfo of ${instance.host} ...`);
|
|
|
|
|
|
|
|
try {
|
2023-01-12 21:40:33 -07:00
|
|
|
const wellknown = (await getJson(
|
|
|
|
`https://${instance.host}/.well-known/nodeinfo`,
|
|
|
|
).catch((e) => {
|
|
|
|
if (e.statusCode === 404) {
|
|
|
|
throw new Error("No nodeinfo provided");
|
|
|
|
} else {
|
|
|
|
throw new Error(e.statusCode || e.message);
|
|
|
|
}
|
|
|
|
})) as Record<string, unknown>;
|
2020-07-25 20:04:07 -06:00
|
|
|
|
|
|
|
if (wellknown.links == null || !Array.isArray(wellknown.links)) {
|
2023-01-12 21:40:33 -07:00
|
|
|
throw new Error("No wellknown links");
|
2020-07-25 20:04:07 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
const links = wellknown.links as any[];
|
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
const lnik1_0 = links.find(
|
|
|
|
(link) => link.rel === "http://nodeinfo.diaspora.software/ns/schema/1.0",
|
|
|
|
);
|
|
|
|
const lnik2_0 = links.find(
|
|
|
|
(link) => link.rel === "http://nodeinfo.diaspora.software/ns/schema/2.0",
|
|
|
|
);
|
|
|
|
const lnik2_1 = links.find(
|
|
|
|
(link) => link.rel === "http://nodeinfo.diaspora.software/ns/schema/2.1",
|
|
|
|
);
|
2020-07-25 20:04:07 -06:00
|
|
|
const link = lnik2_1 || lnik2_0 || lnik1_0;
|
|
|
|
|
|
|
|
if (link == null) {
|
2023-01-12 21:40:33 -07:00
|
|
|
throw new Error("No nodeinfo link provided");
|
2020-07-25 20:04:07 -06:00
|
|
|
}
|
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
const info = await getJson(link.href).catch((e) => {
|
|
|
|
throw new Error(e.statusCode || e.message);
|
|
|
|
});
|
2020-07-25 20:04:07 -06:00
|
|
|
|
|
|
|
logger.succ(`Successfuly fetched nodeinfo of ${instance.host}`);
|
|
|
|
|
2022-04-03 01:30:22 -06:00
|
|
|
return info as NodeInfo;
|
2020-07-25 20:04:07 -06:00
|
|
|
} catch (e) {
|
2022-08-04 03:00:02 -06:00
|
|
|
logger.error(`Failed to fetch nodeinfo of ${instance.host}: ${e.message}`);
|
2020-07-25 20:04:07 -06:00
|
|
|
|
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
async function fetchDom(instance: Instance): Promise<DOMWindow["document"]> {
|
2020-10-27 01:16:59 -06:00
|
|
|
logger.info(`Fetching HTML of ${instance.host} ...`);
|
2020-07-25 20:04:07 -06:00
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
const url = `https://${instance.host}`;
|
2020-07-25 20:04:07 -06:00
|
|
|
|
|
|
|
const html = await getHtml(url);
|
|
|
|
|
|
|
|
const { window } = new JSDOM(html);
|
|
|
|
const doc = window.document;
|
|
|
|
|
2020-10-27 01:16:59 -06:00
|
|
|
return doc;
|
|
|
|
}
|
2020-07-25 20:04:07 -06:00
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
async function fetchManifest(
|
|
|
|
instance: Instance,
|
|
|
|
): Promise<Record<string, unknown> | null> {
|
|
|
|
const url = `https://${instance.host}`;
|
2020-07-25 20:04:07 -06:00
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
const manifestUrl = `${url}/manifest.json`;
|
2020-10-27 01:16:59 -06:00
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
const manifest = (await getJson(manifestUrl)) as Record<string, unknown>;
|
2020-07-25 20:04:07 -06:00
|
|
|
|
2020-10-27 01:16:59 -06:00
|
|
|
return manifest;
|
|
|
|
}
|
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
async function fetchFaviconUrl(
|
|
|
|
instance: Instance,
|
|
|
|
doc: DOMWindow["document"] | null,
|
|
|
|
): Promise<string | null> {
|
|
|
|
const url = `https://${instance.host}`;
|
2020-10-27 01:44:54 -06:00
|
|
|
|
|
|
|
if (doc) {
|
2022-02-23 04:17:43 -07:00
|
|
|
// https://github.com/misskey-dev/misskey/pull/8220#issuecomment-1025104043
|
2023-01-12 21:40:33 -07:00
|
|
|
const href = Array.from(doc.getElementsByTagName("link"))
|
|
|
|
.reverse()
|
|
|
|
.find((link) => link.relList.contains("icon"))?.href;
|
2020-10-27 01:44:54 -06:00
|
|
|
|
|
|
|
if (href) {
|
2023-01-12 21:40:33 -07:00
|
|
|
return new URL(href, url).href;
|
2020-10-27 01:44:54 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
const faviconUrl = `${url}/favicon.ico`;
|
2020-07-25 20:04:07 -06:00
|
|
|
|
|
|
|
const favicon = await fetch(faviconUrl, {
|
2022-04-03 01:30:22 -06:00
|
|
|
// TODO
|
|
|
|
//timeout: 10000,
|
2020-07-25 20:04:07 -06:00
|
|
|
agent: getAgentByUrl,
|
|
|
|
});
|
|
|
|
|
|
|
|
if (favicon.ok) {
|
|
|
|
return faviconUrl;
|
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
2020-10-27 01:16:59 -06:00
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
async function fetchIconUrl(
|
|
|
|
instance: Instance,
|
|
|
|
doc: DOMWindow["document"] | null,
|
|
|
|
manifest: Record<string, any> | null,
|
|
|
|
): Promise<string | null> {
|
2023-01-16 12:19:20 -07:00
|
|
|
if (manifest?.icons && manifest.icons.length > 0 && manifest.icons[0].src) {
|
2023-01-12 21:40:33 -07:00
|
|
|
const url = `https://${instance.host}`;
|
|
|
|
return new URL(manifest.icons[0].src, url).href;
|
2020-10-27 01:44:54 -06:00
|
|
|
}
|
|
|
|
|
2020-10-27 01:16:59 -06:00
|
|
|
if (doc) {
|
2023-01-12 21:40:33 -07:00
|
|
|
const url = `https://${instance.host}`;
|
2020-10-27 01:16:59 -06:00
|
|
|
|
2022-02-23 04:17:43 -07:00
|
|
|
// https://github.com/misskey-dev/misskey/pull/8220#issuecomment-1025104043
|
2023-01-12 21:40:33 -07:00
|
|
|
const links = Array.from(doc.getElementsByTagName("link")).reverse();
|
2022-02-23 04:17:43 -07:00
|
|
|
// https://github.com/misskey-dev/misskey/pull/8220/files/0ec4eba22a914e31b86874f12448f88b3e58dd5a#r796487559
|
2023-01-12 21:40:33 -07:00
|
|
|
const href = [
|
|
|
|
links.find((link) =>
|
|
|
|
link.relList.contains("apple-touch-icon-precomposed"),
|
|
|
|
)?.href,
|
|
|
|
links.find((link) => link.relList.contains("apple-touch-icon"))?.href,
|
|
|
|
links.find((link) => link.relList.contains("icon"))?.href,
|
|
|
|
].find((href) => href);
|
2020-10-27 01:16:59 -06:00
|
|
|
|
|
|
|
if (href) {
|
2023-01-12 21:40:33 -07:00
|
|
|
return new URL(href, url).href;
|
2020-10-27 01:16:59 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
async function getThemeColor(
|
|
|
|
info: NodeInfo | null,
|
|
|
|
doc: DOMWindow["document"] | null,
|
|
|
|
manifest: Record<string, any> | null,
|
|
|
|
): Promise<string | null> {
|
|
|
|
const themeColor =
|
|
|
|
info?.metadata?.themeColor ||
|
|
|
|
doc?.querySelector('meta[name="theme-color"]')?.getAttribute("content") ||
|
|
|
|
manifest?.theme_color;
|
2020-10-27 01:16:59 -06:00
|
|
|
|
2022-05-19 01:54:45 -06:00
|
|
|
if (themeColor) {
|
|
|
|
const color = new tinycolor(themeColor);
|
|
|
|
if (color.isValid()) return color.toHexString();
|
2020-10-27 01:16:59 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
async function getSiteName(
|
|
|
|
info: NodeInfo | null,
|
|
|
|
doc: DOMWindow["document"] | null,
|
|
|
|
manifest: Record<string, any> | null,
|
|
|
|
): Promise<string | null> {
|
|
|
|
if (info?.metadata) {
|
2020-10-27 01:16:59 -06:00
|
|
|
if (info.metadata.nodeName || info.metadata.name) {
|
|
|
|
return info.metadata.nodeName || info.metadata.name;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (doc) {
|
2023-01-12 21:40:33 -07:00
|
|
|
const og = doc
|
|
|
|
.querySelector('meta[property="og:title"]')
|
|
|
|
?.getAttribute("content");
|
2020-10-27 01:16:59 -06:00
|
|
|
|
|
|
|
if (og) {
|
|
|
|
return og;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (manifest) {
|
2023-01-04 21:06:48 -07:00
|
|
|
return manifest.name || manifest.short_name;
|
2020-10-27 01:16:59 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
async function getDescription(
|
|
|
|
info: NodeInfo | null,
|
|
|
|
doc: DOMWindow["document"] | null,
|
|
|
|
manifest: Record<string, any> | null,
|
|
|
|
): Promise<string | null> {
|
|
|
|
if (info?.metadata) {
|
2020-10-27 01:16:59 -06:00
|
|
|
if (info.metadata.nodeDescription || info.metadata.description) {
|
|
|
|
return info.metadata.nodeDescription || info.metadata.description;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (doc) {
|
2023-01-12 21:40:33 -07:00
|
|
|
const meta = doc
|
|
|
|
.querySelector('meta[name="description"]')
|
|
|
|
?.getAttribute("content");
|
2020-10-27 01:16:59 -06:00
|
|
|
if (meta) {
|
|
|
|
return meta;
|
|
|
|
}
|
|
|
|
|
2023-01-12 21:40:33 -07:00
|
|
|
const og = doc
|
|
|
|
.querySelector('meta[property="og:description"]')
|
|
|
|
?.getAttribute("content");
|
2020-10-27 01:16:59 -06:00
|
|
|
if (og) {
|
|
|
|
return og;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (manifest) {
|
2023-01-04 21:06:48 -07:00
|
|
|
return manifest.name || manifest.short_name;
|
2020-10-27 01:16:59 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|