diff --git a/config/config.exs b/config/config.exs index 1af037051..644155aeb 100644 --- a/config/config.exs +++ b/config/config.exs @@ -567,7 +567,8 @@ config :pleroma, Oban, attachments_cleanup: 1, new_users_digest: 1, mute_expire: 5, - search_indexing: 10 + search_indexing: 10, + nodeinfo_fetcher: 1 ], plugins: [ Oban.Plugins.Pruner, @@ -806,7 +807,8 @@ config :ex_aws, http_client: Pleroma.HTTP.ExAws config :web_push_encryption, http_client: Pleroma.HTTP.WebPush -config :pleroma, :instances_favicons, enabled: false +config :pleroma, :instances_favicons, enabled: true +config :pleroma, :instances_nodeinfo, enabled: true config :floki, :html_parser, Floki.HTMLParser.FastHtml diff --git a/config/description.exs b/config/description.exs index 1ff0a582b..4843c0aae 100644 --- a/config/description.exs +++ b/config/description.exs @@ -3047,6 +3047,19 @@ config :pleroma, :config_description, [ } ] }, + %{ + group: :pleroma, + key: :instances_nodeinfo, + type: :group, + description: "Control favicons for instances", + children: [ + %{ + key: :enabled, + type: :boolean, + description: "Allow/disallow getting instance nodeinfo" + } + ] + }, %{ group: :ex_aws, key: :s3, diff --git a/config/test.exs b/config/test.exs index a5edb1149..3056dbd03 100644 --- a/config/test.exs +++ b/config/test.exs @@ -139,6 +139,8 @@ config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/", priv # Reduce recompilation time # https://dashbit.co/blog/speeding-up-re-compilation-of-elixir-projects config :phoenix, :plug_init_mode, :runtime +config :pleroma, :instances_favicons, enabled: false +config :pleroma, :instances_nodeinfo, enabled: false if File.exists?("./config/test.secret.exs") do import_config "test.secret.exs" diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index adccd7c5d..a78924dfa 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -156,7 +156,8 @@ defmodule Pleroma.Application do build_cachex("emoji_packs", expiration: emoji_packs_expiration(), limit: 10), build_cachex("failed_proxy_url", limit: 2500), build_cachex("banned_urls", default_ttl: :timer.hours(24 * 30), limit: 5_000), - build_cachex("translations", default_ttl: :timer.hours(24 * 30), limit: 2500) + build_cachex("translations", default_ttl: :timer.hours(24 * 30), limit: 2500), + build_cachex("instances", default_ttl: :timer.hours(24), limit: 2500) ] end diff --git a/lib/pleroma/instances/instance.ex b/lib/pleroma/instances/instance.ex index 533dbbb82..fcf3181bf 100644 --- a/lib/pleroma/instances/instance.ex +++ b/lib/pleroma/instances/instance.ex @@ -5,6 +5,8 @@ defmodule Pleroma.Instances.Instance do @moduledoc "Instance." + @cachex Pleroma.Config.get([:cachex, :provider], Cachex) + alias Pleroma.Instances alias Pleroma.Instances.Instance alias Pleroma.Repo @@ -22,7 +24,8 @@ defmodule Pleroma.Instances.Instance do field(:host, :string) field(:unreachable_since, :naive_datetime_usec) field(:favicon, :string) - field(:favicon_updated_at, :naive_datetime) + field(:metadata_updated_at, :naive_datetime) + field(:nodeinfo, :map, default: %{}) timestamps() end @@ -31,7 +34,7 @@ defmodule Pleroma.Instances.Instance do def changeset(struct, params \\ %{}) do struct - |> cast(params, [:host, :unreachable_since, :favicon, :favicon_updated_at]) + |> cast(params, [:host, :unreachable_since, :favicon, :nodeinfo, :metadata_updated_at]) |> validate_required([:host]) |> unique_constraint(:host) end @@ -138,63 +141,144 @@ defmodule Pleroma.Instances.Instance do defp parse_datetime(datetime), do: datetime - def get_or_update_favicon(%URI{host: host} = instance_uri) do - existing_record = Repo.get_by(Instance, %{host: host}) - now = NaiveDateTime.utc_now() + def needs_update(nil), do: true - if existing_record && existing_record.favicon_updated_at && - NaiveDateTime.diff(now, existing_record.favicon_updated_at) < 86_400 do + def needs_update(%Instance{metadata_updated_at: nil}), do: true + + def needs_update(%Instance{metadata_updated_at: metadata_updated_at}) do + now = NaiveDateTime.utc_now() + NaiveDateTime.diff(now, metadata_updated_at) > 86_400 + end + + def local do + %Instance{ + host: Pleroma.Web.Endpoint.host(), + favicon: Pleroma.Web.Endpoint.url() <> "/favicon.png", + nodeinfo: Pleroma.Web.Nodeinfo.NodeinfoController.raw_nodeinfo() + } + end + + def update_metadata(%URI{host: host} = uri) do + Logger.info("Checking metadata for #{host}") + existing_record = Repo.get_by(Instance, %{host: host}) + + if reachable?(host) do + do_update_metadata(uri, existing_record) + else + {:discard, :unreachable} + end + end + + defp do_update_metadata(%URI{host: host} = uri, existing_record) do + if existing_record do + if needs_update(existing_record) do + Logger.info("Updating metadata for #{host}") + favicon = scrape_favicon(uri) + nodeinfo = scrape_nodeinfo(uri) + + {:ok, instance} = + existing_record + |> changeset(%{ + host: host, + favicon: favicon, + nodeinfo: nodeinfo, + metadata_updated_at: NaiveDateTime.utc_now() + }) + |> Repo.update() + + @cachex.put(:instances_cache, "instances:#{host}", instance) + else + {:discard, "Does not require update"} + end + else + favicon = scrape_favicon(uri) + nodeinfo = scrape_nodeinfo(uri) + + Logger.info("Creating metadata for #{host}") + + {:ok, instance} = + %Instance{} + |> changeset(%{ + host: host, + favicon: favicon, + nodeinfo: nodeinfo, + metadata_updated_at: NaiveDateTime.utc_now() + }) + |> Repo.insert() + + @cachex.put(:instances_cache, "instances:#{host}", instance) + end + end + + def get_favicon(%URI{host: host}) do + existing_record = Repo.get_by(Instance, %{host: host}) + + if existing_record do existing_record.favicon else - favicon = scrape_favicon(instance_uri) - - if existing_record do - existing_record - |> changeset(%{favicon: favicon, favicon_updated_at: now}) - |> Repo.update() - else - %Instance{} - |> changeset(%{host: host, favicon: favicon, favicon_updated_at: now}) - |> Repo.insert() - end - - favicon - end - rescue - e -> - Logger.warn("Instance.get_or_update_favicon(\"#{host}\") error: #{inspect(e)}") nil + end + end + + defp scrape_nodeinfo(%URI{} = instance_uri) do + with true <- Pleroma.Config.get([:instances_nodeinfo, :enabled]), + {_, true} <- {:reachable, reachable?(instance_uri.host)}, + {:ok, %Tesla.Env{status: 200, body: body}} <- + Tesla.get( + "https://#{instance_uri.host}/.well-known/nodeinfo", + headers: [{"Accept", "application/json"}] + ), + {:ok, json} <- Jason.decode(body), + {:ok, %{"links" => links}} <- {:ok, json}, + {:ok, %{"href" => href}} <- + {:ok, + Enum.find(links, &(&1["rel"] == "http://nodeinfo.diaspora.software/ns/schema/2.0"))}, + {:ok, %Tesla.Env{body: data}} <- + Pleroma.HTTP.get(href, [{"accept", "application/json"}], []), + {:length, true} <- {:length, String.length(data) < 50_000}, + {:ok, nodeinfo} <- Jason.decode(data) do + nodeinfo + else + {:reachable, false} -> + Logger.debug( + "Instance.scrape_nodeinfo(\"#{to_string(instance_uri)}\") ignored unreachable host" + ) + + nil + + {:length, false} -> + Logger.debug( + "Instance.scrape_nodeinfo(\"#{to_string(instance_uri)}\") ignored too long body" + ) + + nil + + _ -> + nil + end end defp scrape_favicon(%URI{} = instance_uri) do - try do - with {_, true} <- {:reachable, reachable?(instance_uri.host)}, - {:ok, %Tesla.Env{body: html}} <- - Pleroma.HTTP.get(to_string(instance_uri), [{"accept", "text/html"}], []), - {_, [favicon_rel | _]} when is_binary(favicon_rel) <- - {:parse, - html |> Floki.parse_document!() |> Floki.attribute("link[rel=icon]", "href")}, - {_, favicon} when is_binary(favicon) <- - {:merge, URI.merge(instance_uri, favicon_rel) |> to_string()} do - favicon - else - {:reachable, false} -> - Logger.debug( - "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") ignored unreachable host" - ) - - nil - - _ -> - nil - end - rescue - e -> - Logger.warn( - "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") error: #{inspect(e)}" + with true <- Pleroma.Config.get([:instances_favicons, :enabled]), + {_, true} <- {:reachable, reachable?(instance_uri.host)}, + {:ok, %Tesla.Env{body: html}} <- + Pleroma.HTTP.get(to_string(instance_uri), [{"accept", "text/html"}], []), + {_, [favicon_rel | _]} when is_binary(favicon_rel) <- + {:parse, html |> Floki.parse_document!() |> Floki.attribute("link[rel=icon]", "href")}, + {_, favicon} when is_binary(favicon) <- + {:merge, URI.merge(instance_uri, favicon_rel) |> to_string()}, + {:length, true} <- {:length, String.length(favicon) < 255} do + favicon + else + {:reachable, false} -> + Logger.debug( + "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") ignored unreachable host" ) nil + + _ -> + nil end end @@ -217,4 +301,25 @@ defmodule Pleroma.Instances.Instance do end) |> Stream.run() end + + def get_by_url(url_or_host) do + url = host(url_or_host) + Repo.get_by(Instance, host: url) + end + + def get_cached_by_url(url_or_host) do + url = host(url_or_host) + + if url == Pleroma.Web.Endpoint.host() do + {:ok, local()} + else + @cachex.fetch!(:instances_cache, "instances:#{url}", fn _ -> + with %Instance{} = instance <- get_by_url(url) do + {:commit, {:ok, instance}} + else + _ -> {:ignore, nil} + end + end) + end + end end diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index c3258c75b..18643662e 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -192,6 +192,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do # - Increase the user note count # - Increase the reply count # - Increase replies count + # - Ask for scraping of nodeinfo # - Set up ActivityExpiration # - Set up notifications # - Index incoming posts for search (if needed) @@ -209,6 +210,10 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do reply_depth = (meta[:depth] || 0) + 1 + Pleroma.Workers.NodeInfoFetcherWorker.enqueue("process", %{ + "source_url" => activity.data["actor"] + }) + # FIXME: Force inReplyTo to replies if Pleroma.Web.Federator.allowed_thread_distance?(reply_depth) and object.data["replies"] != nil do @@ -234,7 +239,9 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do {:ok, activity, meta} else - e -> Repo.rollback(e) + e -> + Logger.error(inspect(e)) + Repo.rollback(e) end end diff --git a/lib/pleroma/web/mastodon_api/views/account_view.ex b/lib/pleroma/web/mastodon_api/views/account_view.ex index 06acf0a26..cbb57aee6 100644 --- a/lib/pleroma/web/mastodon_api/views/account_view.ex +++ b/lib/pleroma/web/mastodon_api/views/account_view.ex @@ -186,6 +186,16 @@ defmodule Pleroma.Web.MastodonAPI.AccountView do render_many(targets, AccountView, "relationship.json", render_opts) end + def render("instance.json", %{instance: %Pleroma.Instances.Instance{} = instance}) do + %{ + name: instance.host, + favicon: instance.favicon |> MediaProxy.url(), + nodeinfo: instance.nodeinfo + } + end + + def render("instance.json", _), do: nil + defp do_render("show.json", %{user: user} = opts) do user = User.sanitize_html(user, User.html_filter_policy(opts[:for])) display_name = user.name || user.nickname @@ -230,16 +240,20 @@ defmodule Pleroma.Web.MastodonAPI.AccountView do %{} end - favicon = - if Pleroma.Config.get([:instances_favicons, :enabled]) do - user - |> Map.get(:ap_id, "") - |> URI.parse() - |> URI.merge("/") - |> Pleroma.Instances.Instance.get_or_update_favicon() - |> MediaProxy.url() + instance = + with {:ok, instance} <- Pleroma.Instances.Instance.get_cached_by_url(user.ap_id) do + instance else + _ -> + nil + end + + favicon = + if is_nil(instance) do nil + else + instance.favicon + |> MediaProxy.url() end %{ @@ -271,7 +285,9 @@ defmodule Pleroma.Web.MastodonAPI.AccountView do } }, last_status_at: user.last_status_at, - + akkoma: %{ + instance: render("instance.json", %{instance: instance}) + }, # Pleroma extensions # Note: it's insecure to output :email but fully-qualified nickname may serve as safe stub fqn: User.full_nickname(user), diff --git a/lib/pleroma/workers/nodeinfo_fetcher_worker.ex b/lib/pleroma/workers/nodeinfo_fetcher_worker.ex new file mode 100644 index 000000000..27492e1e3 --- /dev/null +++ b/lib/pleroma/workers/nodeinfo_fetcher_worker.ex @@ -0,0 +1,18 @@ +defmodule Pleroma.Workers.NodeInfoFetcherWorker do + use Pleroma.Workers.WorkerHelper, queue: "nodeinfo_fetcher" + + alias Oban.Job + alias Pleroma.Instances.Instance + + @impl Oban.Worker + def perform(%Job{ + args: %{"op" => "process", "source_url" => domain} + }) do + uri = + domain + |> URI.parse() + |> URI.merge("/") + + Instance.update_metadata(uri) + end +end diff --git a/priv/repo/migrations/20221020135943_add_nodeinfo.exs b/priv/repo/migrations/20221020135943_add_nodeinfo.exs new file mode 100644 index 000000000..17707f3f7 --- /dev/null +++ b/priv/repo/migrations/20221020135943_add_nodeinfo.exs @@ -0,0 +1,17 @@ +defmodule Pleroma.Repo.Migrations.AddNodeinfo do + use Ecto.Migration + + def up do + alter table(:instances) do + add_if_not_exists(:nodeinfo, :map, default: %{}) + add_if_not_exists(:metadata_updated_at, :naive_datetime) + end + end + + def down do + alter table(:instances) do + remove_if_exists(:nodeinfo, :map) + remove_if_exists(:metadata_updated_at, :naive_datetime) + end + end +end diff --git a/test/pleroma/instances/instance_test.exs b/test/pleroma/instances/instance_test.exs index e49922724..adc847da5 100644 --- a/test/pleroma/instances/instance_test.exs +++ b/test/pleroma/instances/instance_test.exs @@ -9,12 +9,16 @@ defmodule Pleroma.Instances.InstanceTest do alias Pleroma.Tests.ObanHelpers alias Pleroma.Web.CommonAPI - use Pleroma.DataCase + use Pleroma.DataCase, async: true import ExUnit.CaptureLog import Pleroma.Factory - setup_all do: clear_config([:instance, :federation_reachability_timeout_days], 1) + setup_all do + clear_config([:instance, :federation_reachability_timeout_days], 1) + clear_config([:instances_nodeinfo, :enabled], true) + clear_config([:instances_favicons, :enabled], true) + end describe "set_reachable/1" do test "clears `unreachable_since` of existing matching Instance record having non-nil `unreachable_since`" do @@ -102,62 +106,220 @@ defmodule Pleroma.Instances.InstanceTest do end end - describe "get_or_update_favicon/1" do - test "Scrapes favicon URLs" do - Tesla.Mock.mock(fn %{url: "https://favicon.example.org/"} -> - %Tesla.Env{ - status: 200, - body: ~s[
] - } + describe "update_metadata/1" do + test "Scrapes favicon URLs and nodeinfo" do + Tesla.Mock.mock(fn + %{url: "https://favicon.example.org/"} -> + %Tesla.Env{ + status: 200, + body: ~s[] + } + + %{url: "https://favicon.example.org/.well-known/nodeinfo"} -> + %Tesla.Env{ + status: 200, + body: + Jason.encode!(%{ + links: [ + %{ + rel: "http://nodeinfo.diaspora.software/ns/schema/2.0", + href: "https://favicon.example.org/nodeinfo/2.0" + } + ] + }) + } + + %{url: "https://favicon.example.org/nodeinfo/2.0"} -> + %Tesla.Env{ + status: 200, + body: Jason.encode!(%{version: "2.0", software: %{name: "Akkoma"}}) + } end) - assert "https://favicon.example.org/favicon.png" == - Instance.get_or_update_favicon(URI.parse("https://favicon.example.org/")) + assert {:ok, true} == + Instance.update_metadata(URI.parse("https://favicon.example.org/")) + + {:ok, instance} = Instance.get_cached_by_url("https://favicon.example.org/") + assert instance.favicon == "https://favicon.example.org/favicon.png" + assert instance.nodeinfo == %{"version" => "2.0", "software" => %{"name" => "Akkoma"}} end - test "Returns nil on too long favicon URLs" do + test "Does not retain favicons that are too long" do long_favicon_url = "https://Lorem.ipsum.dolor.sit.amet/consecteturadipiscingelit/Praesentpharetrapurusutaliquamtempus/Mauriseulaoreetarcu/atfacilisisorci/Nullamporttitor/nequesedfeugiatmollis/dolormagnaefficiturlorem/nonpretiumsapienorcieurisus/Nullamveleratsem/Maecenassedaccumsanexnam/favicon.png" - Tesla.Mock.mock(fn %{url: "https://long-favicon.example.org/"} -> - %Tesla.Env{ - status: 200, - body: - ~s[] - } + Tesla.Mock.mock(fn + %{url: "https://long-favicon.example.org/"} -> + %Tesla.Env{ + status: 200, + body: + ~s[] + } + + %{url: "https://long-favicon.example.org/.well-known/nodeinfo"} -> + %Tesla.Env{ + status: 200, + body: + Jason.encode!(%{ + links: [ + %{ + rel: "http://nodeinfo.diaspora.software/ns/schema/2.0", + href: "https://long-favicon.example.org/nodeinfo/2.0" + } + ] + }) + } + + %{url: "https://long-favicon.example.org/nodeinfo/2.0"} -> + %Tesla.Env{ + status: 200, + body: Jason.encode!(%{version: "2.0", software: %{name: "Akkoma"}}) + } end) - assert capture_log(fn -> - assert nil == - Instance.get_or_update_favicon( - URI.parse("https://long-favicon.example.org/") - ) - end) =~ - "Instance.get_or_update_favicon(\"long-favicon.example.org\") error: %Postgrex.Error{" + assert {:ok, true} == + Instance.update_metadata(URI.parse("https://long-favicon.example.org/")) + + {:ok, instance} = Instance.get_cached_by_url("https://long-favicon.example.org/") + assert instance.favicon == nil end test "Handles not getting a favicon URL properly" do - Tesla.Mock.mock(fn %{url: "https://no-favicon.example.org/"} -> - %Tesla.Env{ - status: 200, - body: ~s[