From 4a43415ff83d14fc0dbb727b9a4e5774dc41c08a Mon Sep 17 00:00:00 2001 From: syuilo Date: Sun, 26 Jul 2020 11:04:07 +0900 Subject: [PATCH] feat(server): Fetch icon url of an instance (#6591) * feat(server): Fetch icon url of an instance Resolve #6589 * chore: Rename the function --- migration/1595676934834-instance-icon-url.ts | 14 ++ src/misc/app-lock.ts | 4 +- src/misc/fetch.ts | 21 +++ src/models/entities/instance.ts | 5 + src/queue/processors/deliver.ts | 4 +- src/queue/processors/inbox.ts | 4 +- src/remote/activitypub/models/person.ts | 4 +- src/services/fetch-instance-metadata.ts | 135 +++++++++++++++++++ src/services/fetch-nodeinfo.ts | 72 ---------- 9 files changed, 183 insertions(+), 80 deletions(-) create mode 100644 migration/1595676934834-instance-icon-url.ts create mode 100644 src/services/fetch-instance-metadata.ts delete mode 100644 src/services/fetch-nodeinfo.ts diff --git a/migration/1595676934834-instance-icon-url.ts b/migration/1595676934834-instance-icon-url.ts new file mode 100644 index 000000000..c75370f17 --- /dev/null +++ b/migration/1595676934834-instance-icon-url.ts @@ -0,0 +1,14 @@ +import {MigrationInterface, QueryRunner} from "typeorm"; + +export class instanceIconUrl1595676934834 implements MigrationInterface { + name = 'instanceIconUrl1595676934834' + + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(`ALTER TABLE "instance" ADD "iconUrl" character varying(256) DEFAULT null`); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`ALTER TABLE "instance" DROP COLUMN "iconUrl"`); + } + +} diff --git a/src/misc/app-lock.ts b/src/misc/app-lock.ts index ca2181f87..847299b46 100644 --- a/src/misc/app-lock.ts +++ b/src/misc/app-lock.ts @@ -21,8 +21,8 @@ export function getApLock(uri: string, timeout = 30 * 1000) { return lock(`ap-object:${uri}`, timeout); } -export function getNodeinfoLock(host: string, timeout = 30 * 1000) { - return lock(`nodeinfo:${host}`, timeout); +export function getFetchInstanceMetadataLock(host: string, timeout = 30 * 1000) { + return lock(`instance:${host}`, timeout); } export function getChartInsertLock(lockKey: string, timeout = 30 * 1000) { diff --git a/src/misc/fetch.ts b/src/misc/fetch.ts index 358bc2503..7be0e53fd 100644 --- a/src/misc/fetch.ts +++ b/src/misc/fetch.ts @@ -27,6 +27,27 @@ export async function getJson(url: string, accept = 'application/json, */*', tim return await res.json(); } +export async function getHtml(url: string, accept = 'text/html, */*', timeout = 10000, headers?: HeadersInit) { + const res = await fetch(url, { + headers: Object.assign({ + 'User-Agent': config.userAgent, + Accept: accept + }, headers || {}), + timeout, + agent: getAgentByUrl, + }); + + if (!res.ok) { + throw { + name: `StatusError`, + statusCode: res.status, + message: `${res.status} ${res.statusText}`, + }; + } + + return await res.text(); +} + /** * Get http non-proxy agent */ diff --git a/src/models/entities/instance.ts b/src/models/entities/instance.ts index fe620887d..5fedfc095 100644 --- a/src/models/entities/instance.ts +++ b/src/models/entities/instance.ts @@ -158,6 +158,11 @@ export class Instance { }) public maintainerEmail: string | null; + @Column('varchar', { + length: 256, nullable: true, default: null, + }) + public iconUrl: string | null; + @Column('timestamp with time zone', { nullable: true, }) diff --git a/src/queue/processors/deliver.ts b/src/queue/processors/deliver.ts index 16b2f6e29..cb7587ef8 100644 --- a/src/queue/processors/deliver.ts +++ b/src/queue/processors/deliver.ts @@ -4,7 +4,7 @@ import { registerOrFetchInstanceDoc } from '../../services/register-or-fetch-ins import Logger from '../../services/logger'; import { Instances } from '../../models'; import { instanceChart } from '../../services/chart'; -import { fetchNodeinfo } from '../../services/fetch-nodeinfo'; +import { fetchInstanceMetadata } from '../../services/fetch-instance-metadata'; import { fetchMeta } from '../../misc/fetch-meta'; import { toPuny } from '../../misc/convert-host'; @@ -48,7 +48,7 @@ export default async (job: Bull.Job) => { isNotResponding: false }); - fetchNodeinfo(i); + fetchInstanceMetadata(i); instanceChart.requestSent(i.host, true); }); diff --git a/src/queue/processors/inbox.ts b/src/queue/processors/inbox.ts index 1d35079e9..b4e8b85a4 100644 --- a/src/queue/processors/inbox.ts +++ b/src/queue/processors/inbox.ts @@ -8,7 +8,7 @@ import { instanceChart } from '../../services/chart'; import { fetchMeta } from '../../misc/fetch-meta'; import { toPuny, extractDbHost } from '../../misc/convert-host'; import { getApId } from '../../remote/activitypub/type'; -import { fetchNodeinfo } from '../../services/fetch-nodeinfo'; +import { fetchInstanceMetadata } from '../../services/fetch-instance-metadata'; import { InboxJobData } from '..'; import DbResolver from '../../remote/activitypub/db-resolver'; import { resolvePerson } from '../../remote/activitypub/models/person'; @@ -126,7 +126,7 @@ export default async (job: Bull.Job): Promise => { isNotResponding: false }); - fetchNodeinfo(i); + fetchInstanceMetadata(i); instanceChart.requestReceived(i.host); }); diff --git a/src/remote/activitypub/models/person.ts b/src/remote/activitypub/models/person.ts index a213abf47..5213f872e 100644 --- a/src/remote/activitypub/models/person.ts +++ b/src/remote/activitypub/models/person.ts @@ -26,7 +26,7 @@ import { validActor } from '../../../remote/activitypub/type'; import { getConnection } from 'typeorm'; import { ensure } from '../../../prelude/ensure'; import { toArray } from '../../../prelude/array'; -import { fetchNodeinfo } from '../../../services/fetch-nodeinfo'; +import { fetchInstanceMetadata } from '../../../services/fetch-instance-metadata'; const logger = apLogger; @@ -204,7 +204,7 @@ export async function createPerson(uri: string, resolver?: Resolver): Promise { Instances.increment({ id: i.id }, 'usersCount', 1); instanceChart.newUser(i.host); - fetchNodeinfo(i); + fetchInstanceMetadata(i); }); usersChart.update(user!, true); diff --git a/src/services/fetch-instance-metadata.ts b/src/services/fetch-instance-metadata.ts new file mode 100644 index 000000000..41fef859c --- /dev/null +++ b/src/services/fetch-instance-metadata.ts @@ -0,0 +1,135 @@ +import { JSDOM } from 'jsdom'; +import fetch from 'node-fetch'; +import { getJson, getHtml, getAgentByUrl } from '../misc/fetch'; +import { Instance } from '../models/entities/instance'; +import { Instances } from '../models'; +import { getFetchInstanceMetadataLock } from '../misc/app-lock'; +import Logger from './logger'; +import { URL } from 'url'; + +const logger = new Logger('metadata', 'cyan'); + +export async function fetchInstanceMetadata(instance: Instance): Promise { + const unlock = await getFetchInstanceMetadataLock(instance.host); + + const _instance = await Instances.findOne({ host: instance.host }); + const now = Date.now(); + if (_instance && _instance.infoUpdatedAt && (now - _instance.infoUpdatedAt.getTime() < 1000 * 60 * 60 * 24)) { + unlock(); + return; + } + + logger.info(`Fetching metadata of ${instance.host} ...`); + + try { + const [info, icon] = await Promise.all([ + fetchNodeinfo(instance).catch(() => null), + fetchIconUrl(instance).catch(() => null), + ]); + + logger.succ(`Successfuly fetched metadata of ${instance.host}`); + + const updates = { + infoUpdatedAt: new Date(), + } as Record; + + if (info) { + updates.softwareName = info.software.name.toLowerCase(); + updates.softwareVersion = info.software.version; + updates.openRegistrations = info.openRegistrations; + updates.name = info.metadata ? (info.metadata.nodeName || info.metadata.name || null) : null; + updates.description = info.metadata ? (info.metadata.nodeDescription || info.metadata.description || null) : null; + updates.maintainerName = info.metadata ? info.metadata.maintainer ? (info.metadata.maintainer.name || null) : null : null; + updates.maintainerEmail = info.metadata ? info.metadata.maintainer ? (info.metadata.maintainer.email || null) : null : null; + } + + if (icon) { + updates.iconUrl = icon; + } + + await Instances.update(instance.id, updates); + + logger.succ(`Successfuly updated metadata of ${instance.host}`); + } catch (e) { + logger.error(`Failed to update metadata of ${instance.host}: ${e}`); + } finally { + unlock(); + } +} + +async function fetchNodeinfo(instance: Instance): Promise> { + logger.info(`Fetching nodeinfo of ${instance.host} ...`); + + try { + const wellknown = await getJson('https://' + instance.host + '/.well-known/nodeinfo') + .catch(e => { + if (e.statusCode === 404) { + throw 'No nodeinfo provided'; + } else { + throw e.statusCode || e.message; + } + }); + + if (wellknown.links == null || !Array.isArray(wellknown.links)) { + throw 'No wellknown links'; + } + + const links = wellknown.links as any[]; + + const lnik1_0 = links.find(link => link.rel === 'http://nodeinfo.diaspora.software/ns/schema/1.0'); + const lnik2_0 = links.find(link => link.rel === 'http://nodeinfo.diaspora.software/ns/schema/2.0'); + const lnik2_1 = links.find(link => link.rel === 'http://nodeinfo.diaspora.software/ns/schema/2.1'); + const link = lnik2_1 || lnik2_0 || lnik1_0; + + if (link == null) { + throw 'No nodeinfo link provided'; + } + + const info = await getJson(link.href) + .catch(e => { + throw e.statusCode || e.message; + }); + + logger.succ(`Successfuly fetched nodeinfo of ${instance.host}`); + + return info; + } catch (e) { + logger.error(`Failed to fetch nodeinfo of ${instance.host}: ${e}`); + + throw e; + } +} + +async function fetchIconUrl(instance: Instance): Promise { + logger.info(`Fetching icon URL of ${instance.host} ...`); + + const url = 'https://' + instance.host; + + const html = await getHtml(url); + + const { window } = new JSDOM(html); + const doc = window.document; + + const hrefAppleTouchIconPrecomposed = doc.querySelector('link[rel="apple-touch-icon-precomposed"]')?.getAttribute('href'); + const hrefAppleTouchIcon = doc.querySelector('link[rel="apple-touch-icon"]')?.getAttribute('href'); + const hrefIcon = doc.querySelector('link[rel="icon"]')?.getAttribute('href'); + + const href = hrefAppleTouchIconPrecomposed || hrefAppleTouchIcon || hrefIcon; + + if (href) { + return (new URL(href, url)).href; + } + + const faviconUrl = url + '/favicon.ico'; + + const favicon = await fetch(faviconUrl, { + timeout: 10000, + agent: getAgentByUrl, + }); + + if (favicon.ok) { + return faviconUrl; + } + + return null; +} diff --git a/src/services/fetch-nodeinfo.ts b/src/services/fetch-nodeinfo.ts deleted file mode 100644 index 0cf51e337..000000000 --- a/src/services/fetch-nodeinfo.ts +++ /dev/null @@ -1,72 +0,0 @@ -import { getJson } from '../misc/fetch'; -import { Instance } from '../models/entities/instance'; -import { Instances } from '../models'; -import { getNodeinfoLock } from '../misc/app-lock'; -import Logger from '../services/logger'; - -export const logger = new Logger('nodeinfo', 'cyan'); - -export async function fetchNodeinfo(instance: Instance) { - const unlock = await getNodeinfoLock(instance.host); - - const _instance = await Instances.findOne({ host: instance.host }); - const now = Date.now(); - if (_instance && _instance.infoUpdatedAt && (now - _instance.infoUpdatedAt.getTime() < 1000 * 60 * 60 * 24)) { - unlock(); - return; - } - - logger.info(`Fetching nodeinfo of ${instance.host} ...`); - - try { - const wellknown = await getJson('https://' + instance.host + '/.well-known/nodeinfo') - .catch(e => { - if (e.statusCode === 404) { - throw 'No nodeinfo provided'; - } else { - throw e.statusCode || e.message; - } - }); - - if (wellknown.links == null || !Array.isArray(wellknown.links)) { - throw 'No wellknown links'; - } - - const links = wellknown.links as any[]; - - const lnik1_0 = links.find(link => link.rel === 'http://nodeinfo.diaspora.software/ns/schema/1.0'); - const lnik2_0 = links.find(link => link.rel === 'http://nodeinfo.diaspora.software/ns/schema/2.0'); - const lnik2_1 = links.find(link => link.rel === 'http://nodeinfo.diaspora.software/ns/schema/2.1'); - const link = lnik2_1 || lnik2_0 || lnik1_0; - - if (link == null) { - throw 'No nodeinfo link provided'; - } - - const info = await getJson(link.href) - .catch(e => { - throw e.statusCode || e.message; - }); - - await Instances.update(instance.id, { - infoUpdatedAt: new Date(), - softwareName: info.software.name.toLowerCase(), - softwareVersion: info.software.version, - openRegistrations: info.openRegistrations, - name: info.metadata ? (info.metadata.nodeName || info.metadata.name || null) : null, - description: info.metadata ? (info.metadata.nodeDescription || info.metadata.description || null) : null, - maintainerName: info.metadata ? info.metadata.maintainer ? (info.metadata.maintainer.name || null) : null : null, - maintainerEmail: info.metadata ? info.metadata.maintainer ? (info.metadata.maintainer.email || null) : null : null, - }); - - logger.succ(`Successfuly fetched nodeinfo of ${instance.host}`); - } catch (e) { - logger.error(`Failed to fetch nodeinfo of ${instance.host}: ${e}`); - - await Instances.update(instance.id, { - infoUpdatedAt: new Date(), - }); - } finally { - unlock(); - } -}