diff --git a/CHANGELOG.md b/CHANGELOG.md index b69cd6734..569c6b616 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Fixed - Meilisearch: order of results returned from our REST API now actually matches how Meilisearch ranks results +## Changed +- Refactored Rich Media to cache the content in the database. Fetching operations that could block status rendering have been eliminated. + ## 2024.04.1 (Security) ## Fixed diff --git a/changelog.d/card-endpoint.remove b/changelog.d/card-endpoint.remove deleted file mode 100644 index e09a24cf7..000000000 --- a/changelog.d/card-endpoint.remove +++ /dev/null @@ -1 +0,0 @@ -Mastodon API: Remove deprecated GET /api/v1/statuses/:id/card endpoint https://github.com/mastodon/mastodon/pull/11213 diff --git a/changelog.d/rich_media_refactor.change b/changelog.d/rich_media_refactor.change deleted file mode 100644 index c0d4e3b0a..000000000 --- a/changelog.d/rich_media_refactor.change +++ /dev/null @@ -1 +0,0 @@ -Refactored Rich Media to cache the content in the database. Fetching operations that could block status rendering have been eliminated. diff --git a/lib/pleroma/activity/html.ex b/lib/pleroma/activity/html.ex index e4aaad523..92840b1c0 100644 --- a/lib/pleroma/activity/html.ex +++ b/lib/pleroma/activity/html.ex @@ -28,7 +28,7 @@ defmodule Pleroma.Activity.HTML do end end - defp add_cache_key_for(activity_id, additional_key) do + def add_cache_key_for(activity_id, additional_key) do current = get_cache_keys_for(activity_id) unless additional_key in current do diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 4972fb26c..01a5f8585 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -6,8 +6,6 @@ defmodule Pleroma.HTML do # Scrubbers are compiled on boot so they can be configured in OTP releases # @on_load :compile_scrubbers - @cachex Pleroma.Config.get([:cachex, :provider], Cachex) - def compile_scrubbers do dir = Path.join(:code.priv_dir(:pleroma), "scrubbers") diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 982c5b137..1055fb4bc 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -294,18 +294,6 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do # Here the implicit index of the current content is 0 chrono_order = history_len - 1 - content = - object - |> render_content() - - quote_post = - if visible_for_user?(quote_activity, opts[:for]) and opts[:show_quote] != false do - quote_rendering_opts = Map.merge(opts, %{activity: quote_activity, show_quote: false}) - render("show.json", quote_rendering_opts) - else - nil - end - content = object |> render_content() @@ -318,64 +306,12 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do "mastoapi:content:#{chrono_order}" ) - content_plaintext = - content - |> Activity.HTML.get_cached_stripped_html_for_activity( - activity, - "mastoapi:content:#{chrono_order}" - ) - - summary = object.data["summary"] || "" - card = case Card.get_by_activity(activity) do %Card{} = result -> render("card.json", result) _ -> nil end - url = - if user.local do - Pleroma.Web.Router.Helpers.o_status_url(Pleroma.Web.Endpoint, :notice, activity) - else - object.data["url"] || object.data["external_url"] || object.data["id"] - end - - direct_conversation_id = - with {_, nil} <- {:direct_conversation_id, opts[:direct_conversation_id]}, - {_, true} <- {:include_id, opts[:with_direct_conversation_id]}, - {_, %User{} = for_user} <- {:for_user, opts[:for]} do - Activity.direct_conversation_id(activity, for_user) - else - {:direct_conversation_id, participation_id} when is_integer(participation_id) -> - participation_id - - _e -> - nil - end - - emoji_reactions = - object - |> Object.get_emoji_reactions() - |> EmojiReactionController.filter_allowed_users( - opts[:for], - Map.get(opts, :with_muted, false) - ) - |> Stream.map(fn {emoji, users, url} -> - build_emoji_map(emoji, users, url, opts[:for]) - end) - |> Enum.to_list() - - # Status muted state (would do 1 request per status unless user mutes are preloaded) - muted = - thread_muted? || - UserRelationship.exists?( - get_in(opts, [:relationships, :user_relationships]), - :mute, - opts[:for], - user, - fn for_user, user -> User.mutes?(for_user, user) end - ) - content_plaintext = content |> Activity.HTML.get_cached_stripped_html_for_activity( @@ -385,8 +321,6 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do summary = object.data["summary"] || "" - card = render("card.json", Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)) - url = if user.local do url(~p[/notice/#{activity}]) diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index 0fb09f5a8..7f6b5d388 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -1,5 +1,5 @@ # Pleroma: A lightweight social networking server -# Copyright © 2017-2021 Pleroma Authors +# Copyright © 2017-2022 Pleroma Authors # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.Parser do @@ -15,10 +15,14 @@ defmodule Pleroma.Web.RichMedia.Parser do @spec parse(String.t()) :: {:ok, map()} | {:error, any()} def parse(url) do - with :ok <- validate_page_url(url), + with {_, true} <- {:config, @config_impl.get([:rich_media, :enabled])}, + :ok <- validate_page_url(url), {:ok, data} <- parse_url(url) do data = Map.put(data, "url", url) {:ok, data} + else + {:config, _} -> {:error, :rich_media_disabled} + e -> e end end @@ -32,21 +36,6 @@ defmodule Pleroma.Web.RichMedia.Parser do end end - def parse_with_timeout(url) do - try do - task = - Task.Supervisor.async_nolink(Pleroma.TaskSupervisor, fn -> - parse_url(url) - end) - - Task.await(task, 5000) - catch - :exit, {:timeout, _} -> - Logger.warning("Timeout while fetching rich media for #{url}") - {:error, :timeout} - end - end - defp maybe_parse(html) do Enum.reduce_while(parsers(), %{}, fn parser, acc -> case parser.parse(html, acc) do @@ -72,4 +61,46 @@ defmodule Pleroma.Web.RichMedia.Parser do end) |> Map.new() end + + @spec validate_page_url(URI.t() | binary()) :: :ok | :error + defp validate_page_url(page_url) when is_binary(page_url) do + validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld]) + + page_url + |> Linkify.Parser.url?(validate_tld: validate_tld) + |> parse_uri(page_url) + end + + defp validate_page_url(%URI{host: host, scheme: "https"}) do + cond do + Linkify.Parser.ip?(host) -> + :error + + host in @config_impl.get([:rich_media, :ignore_hosts], []) -> + :error + + get_tld(host) in @config_impl.get([:rich_media, :ignore_tld], []) -> + :error + + true -> + :ok + end + end + + defp validate_page_url(_), do: :error + + defp parse_uri(true, url) do + url + |> URI.parse() + |> validate_page_url + end + + defp parse_uri(_, _), do: :error + + defp get_tld(host) do + host + |> String.split(".") + |> Enum.reverse() + |> hd + end end diff --git a/lib/pleroma/web/rich_media/parsers/o_embed.ex b/lib/pleroma/web/rich_media/parsers/o_embed.ex index 695740d2e..0f303176c 100644 --- a/lib/pleroma/web/rich_media/parsers/o_embed.ex +++ b/lib/pleroma/web/rich_media/parsers/o_embed.ex @@ -1,5 +1,5 @@ # Pleroma: A lightweight social networking server -# Copyright © 2017-2021 Pleroma Authors +# Copyright © 2017-2022 Pleroma Authors # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do diff --git a/mix.exs b/mix.exs index 1aec1a21e..a789cd967 100644 --- a/mix.exs +++ b/mix.exs @@ -157,7 +157,7 @@ defmodule Pleroma.Mixfile do {:floki, "~> 0.34"}, {:timex, "~> 3.7"}, {:ueberauth, "== 0.10.5"}, - {:linkify, git: "https://akkoma.dev/AkkomaGang/linkify.git"}, + {:linkify, "~> 0.5.3"}, {:http_signatures, git: "https://akkoma.dev/AkkomaGang/http_signatures.git", ref: "6640ce7d24c783ac2ef56e27d00d12e8dc85f396"}, diff --git a/mix.lock b/mix.lock index 04dc170ba..ab24b8dbc 100644 --- a/mix.lock +++ b/mix.lock @@ -65,7 +65,7 @@ "joken": {:hex, :joken, "2.6.1", "2ca3d8d7f83bf7196296a3d9b2ecda421a404634bfc618159981a960020480a1", [:mix], [{:jose, "~> 1.11.9", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "ab26122c400b3d254ce7d86ed066d6afad27e70416df947cdcb01e13a7382e68"}, "jose": {:hex, :jose, "1.11.10", "a903f5227417bd2a08c8a00a0cbcc458118be84480955e8d251297a425723f83", [:mix, :rebar3], [], "hexpm", "0d6cd36ff8ba174db29148fc112b5842186b68a90ce9fc2b3ec3afe76593e614"}, "jumper": {:hex, :jumper, "1.0.2", "68cdcd84472a00ac596b4e6459a41b3062d4427cbd4f1e8c8793c5b54f1406a7", [:mix], [], "hexpm", "9b7782409021e01ab3c08270e26f36eb62976a38c1aa64b2eaf6348422f165e1"}, - "linkify": {:git, "https://akkoma.dev/AkkomaGang/linkify.git", "2567e2c1073fa371fd26fd66dfa5bc77b6919c16", []}, + "linkify": {:hex, :linkify, "0.5.3", "5f8143d8f61f5ff08d3aeeff47ef6509492b4948d8f08007fbf66e4d2246a7f2", [:mix], [], "hexpm", "3ef35a1377d47c25506e07c1c005ea9d38d700699d92ee92825f024434258177"}, "mail": {:hex, :mail, "0.3.1", "cb0a14e4ed8904e4e5a08214e686ccf6f9099346885db17d8c309381f865cc5c", [:mix], [], "hexpm", "1db701e89865c1d5fa296b2b57b1cd587587cca8d8a1a22892b35ef5a8e352a6"}, "majic": {:git, "https://akkoma.dev/AkkomaGang/majic.git", "80540b36939ec83f48e76c61e5000e0fd67706f0", [ref: "80540b36939ec83f48e76c61e5000e0fd67706f0"]}, "makeup": {:hex, :makeup, "1.1.2", "9ba8837913bdf757787e71c1581c21f9d2455f4dd04cfca785c70bbfff1a76a3", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cce1566b81fbcbd21eca8ffe808f33b221f9eee2cbc7a1706fc3da9ff18e6cac"}, diff --git a/test/fixtures/rich_media/google.html b/test/fixtures/rich_media/google.html new file mode 100644 index 000000000..c068397a5 --- /dev/null +++ b/test/fixtures/rich_media/google.html @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/test/fixtures/rich_media/yahoo.html b/test/fixtures/rich_media/yahoo.html new file mode 100644 index 000000000..41d8c5cd9 --- /dev/null +++ b/test/fixtures/rich_media/yahoo.html @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index f28bef51c..ba7d284d5 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -398,6 +398,7 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do ["push_subscriptions"], ["registrations"], ["report_notes"], + ["rich_media_card"], ["scheduled_activities"], ["schema_migrations"], ["thread_mutes"], diff --git a/test/pleroma/web/mastodon_api/views/status_view_test.exs b/test/pleroma/web/mastodon_api/views/status_view_test.exs index ce251a7ba..7db3e3e61 100644 --- a/test/pleroma/web/mastodon_api/views/status_view_test.exs +++ b/test/pleroma/web/mastodon_api/views/status_view_test.exs @@ -17,10 +17,12 @@ defmodule Pleroma.Web.MastodonAPI.StatusViewTest do alias Pleroma.Web.MastodonAPI.AccountView alias Pleroma.Web.MastodonAPI.StatusView alias Pleroma.Web.RichMedia.Card + alias Pleroma.UnstubbedConfigMock, as: ConfigMock import Pleroma.Factory import Tesla.Mock import OpenApiSpex.TestAssertions + import Mox setup do mock(fn env -> apply(HttpRequestMock, :request, [env]) end) diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs index 40d00a170..a5f2563a2 100644 --- a/test/pleroma/web/rich_media/parser_test.exs +++ b/test/pleroma/web/rich_media/parser_test.exs @@ -1,96 +1,30 @@ # Pleroma: A lightweight social networking server -# Copyright © 2017-2021 Pleroma Authors +# Copyright © 2017-2022 Pleroma Authors # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.ParserTest do use Pleroma.DataCase + alias Pleroma.Web.RichMedia.Parser + import Tesla.Mock + setup do - Tesla.Mock.mock_global(fn - %{ - method: :get, - url: "http://example.com/ogp" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")} - - %{ - method: :get, - url: "http://example.com/non-ogp" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")} - - %{ - method: :get, - url: "http://example.com/ogp-missing-title" - } -> - %Tesla.Env{ - status: 200, - body: File.read!("test/fixtures/rich_media/ogp-missing-title.html") - } - - %{ - method: :get, - url: "http://example.com/twitter-card" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} - - %{ - method: :get, - url: "http://example.com/oembed" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")} - - %{ - method: :get, - url: "http://example.com/oembed.json" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")} - - %{method: :get, url: "http://example.com/empty"} -> - %Tesla.Env{status: 200, body: "hello"} - - %{method: :get, url: "http://example.com/malformed"} -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")} - - %{method: :get, url: "http://example.com/error"} -> - {:error, :overload} - - %{ - method: :head, - url: "http://example.com/huge-page" - } -> - %Tesla.Env{ - status: 200, - headers: [{"content-length", "2000001"}, {"content-type", "text/html"}] - } - - %{ - method: :head, - url: "http://example.com/pdf-file" - } -> - %Tesla.Env{ - status: 200, - headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}] - } - - %{method: :head} -> - %Tesla.Env{status: 404, body: "", headers: []} - end) - - :ok + mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) end + setup_all do: clear_config([:rich_media, :enabled], true) + test "returns error when no metadata present" do - assert {:error, _} = Parser.parse("http://example.com/empty") + assert {:error, _} = Parser.parse("https://example.com/empty") end test "doesn't just add a title" do - assert {:error, {:invalid_metadata, _}} = Parser.parse("http://example.com/non-ogp") + assert {:error, {:invalid_metadata, _}} = Parser.parse("https://example.com/non-ogp") end test "parses ogp" do - assert Parser.parse("http://example.com/ogp") == + assert Parser.parse("https://example.com/ogp") == {:ok, %{ "image" => "http://ia.media-imdb.com/images/rock.jpg", @@ -98,12 +32,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do "description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "type" => "video.movie", - "url" => "http://example.com/ogp" + "url" => "https://example.com/ogp" }} end test "falls back to when ogp:title is missing" do - assert Parser.parse("http://example.com/ogp-missing-title") == + assert Parser.parse("https://example.com/ogp-missing-title") == {:ok, %{ "image" => "http://ia.media-imdb.com/images/rock.jpg", @@ -111,12 +45,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do "description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "type" => "video.movie", - "url" => "http://example.com/ogp-missing-title" + "url" => "https://example.com/ogp-missing-title" }} end test "parses twitter card" do - assert Parser.parse("http://example.com/twitter-card") == + assert Parser.parse("https://example.com/twitter-card") == {:ok, %{ "card" => "summary", @@ -124,12 +58,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg", "title" => "Small Island Developing States Photo Submission", "description" => "View the album on Flickr.", - "url" => "http://example.com/twitter-card" + "url" => "https://example.com/twitter-card" }} end test "parses OEmbed and filters HTML tags" do - assert Parser.parse("http://example.com/oembed") == + assert Parser.parse("https://example.com/oembed") == {:ok, %{ "author_name" => "\u202E\u202D\u202Cbees\u202C", @@ -149,7 +83,7 @@ defmodule Pleroma.Web.RichMedia.ParserTest do "thumbnail_width" => 150, "title" => "Bacon Lollys", "type" => "photo", - "url" => "http://example.com/oembed", + "url" => "https://example.com/oembed", "version" => "1.0", "web_page" => "https://www.flickr.com/photos/bees/2362225867/", "web_page_short_url" => "https://flic.kr/p/4AK2sc", @@ -158,19 +92,19 @@ defmodule Pleroma.Web.RichMedia.ParserTest do end test "rejects invalid OGP data" do - assert {:error, _} = Parser.parse("http://example.com/malformed") + assert {:error, _} = Parser.parse("https://example.com/malformed") end test "returns error if getting page was not successful" do - assert {:error, :overload} = Parser.parse("http://example.com/error") + assert {:error, :overload} = Parser.parse("https://example.com/error") end test "does a HEAD request to check if the body is too large" do - assert {:error, :body_too_large} = Parser.parse("http://example.com/huge-page") + assert {:error, :body_too_large} = Parser.parse("https://example.com/huge-page") end test "does a HEAD request to check if the body is html" do - assert {:error, {:content_type, _}} = Parser.parse("http://example.com/pdf-file") + assert {:error, {:content_type, _}} = Parser.parse("https://example.com/pdf-file") end test "refuses to crawl incomplete URLs" do @@ -195,4 +129,10 @@ defmodule Pleroma.Web.RichMedia.ParserTest do assert :error == Parser.parse(url) end) end + + test "returns error when disabled" do + clear_config([:rich_media, :enabled], false) + + assert match?({:error, :rich_media_disabled}, Parser.parse("https://example.com/ogp")) + end end diff --git a/test/support/http_request_mock.ex b/test/support/http_request_mock.ex index 7bfcfaa0e..6a01393e3 100644 --- a/test/support/http_request_mock.ex +++ b/test/support/http_request_mock.ex @@ -1708,14 +1708,41 @@ defmodule HttpRequestMock do # Most of the rich media mocks are missing HEAD requests, so we just return 404. @rich_media_mocks [ + "https://example.com/empty", + "https://example.com/error", + "https://example.com/malformed", + "https://example.com/non-ogp", + "https://example.com/oembed", + "https://example.com/oembed.json", "https://example.com/ogp", "https://example.com/ogp-missing-data", - "https://example.com/twitter-card" + "https://example.com/ogp-missing-title", + "https://example.com/twitter-card", + "https://google.com/", + "https://pleroma.local/notice/9kCP7V", + "https://yahoo.com/" ] + def head(url, _query, _body, _headers) when url in @rich_media_mocks do {:ok, %Tesla.Env{status: 404, body: ""}} end + def head("https://example.com/pdf-file", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}] + }} + end + + def head("https://example.com/huge-page", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + headers: [{"content-length", "2000001"}, {"content-type", "text/html"}] + }} + end + def head(url, query, body, headers) do {:error, "Mock response not implemented for HEAD #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"} diff --git a/test/support/mocks.ex b/test/support/mocks.ex index fd8f825b3..cc34b21a5 100644 --- a/test/support/mocks.ex +++ b/test/support/mocks.ex @@ -26,5 +26,6 @@ Mox.defmock(Pleroma.Web.ActivityPub.SideEffectsMock, Mox.defmock(Pleroma.Web.FederatorMock, for: Pleroma.Web.Federator.Publishing) Mox.defmock(Pleroma.ConfigMock, for: Pleroma.Config.Getting) +Mox.defmock(Pleroma.UnstubbedConfigMock, for: Pleroma.Config.Getting) Mox.defmock(Pleroma.LoggerMock, for: Pleroma.Logging) diff --git a/test/test_helper.exs b/test/test_helper.exs index 22a0f33ee..dafa45099 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -17,3 +17,16 @@ ExUnit.after_suite(fn _results -> uploads = Pleroma.Config.get([Pleroma.Uploaders.Local, :uploads], "test/uploads") File.rm_rf!(uploads) end) + +defmodule Pleroma.Test.StaticConfig do + @moduledoc """ + This module provides a Config that is completely static, built at startup time from the environment. It's safe to use in testing as it will not modify any state. + """ + + @behaviour Pleroma.Config.Getting + @config Application.get_all_env(:pleroma) + + def get(path, default \\ nil) do + get_in(@config, path) || default + end +end