2019-07-09 23:13:23 -06:00
|
|
|
# Pleroma: A lightweight social networking server
|
2021-01-12 23:49:20 -07:00
|
|
|
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
|
2019-07-09 23:13:23 -06:00
|
|
|
# SPDX-License-Identifier: AGPL-3.0-only
|
|
|
|
|
2018-12-01 15:53:10 -07:00
|
|
|
defmodule Pleroma.Object.Fetcher do
|
2019-05-24 22:24:21 -06:00
|
|
|
alias Pleroma.HTTP
|
2022-11-15 10:23:47 -07:00
|
|
|
alias Pleroma.Instances
|
2020-09-15 09:22:08 -06:00
|
|
|
alias Pleroma.Maps
|
2019-04-17 03:22:32 -06:00
|
|
|
alias Pleroma.Object
|
2018-12-01 15:53:10 -07:00
|
|
|
alias Pleroma.Object.Containment
|
2019-09-18 09:13:21 -06:00
|
|
|
alias Pleroma.Repo
|
2019-07-17 16:41:42 -06:00
|
|
|
alias Pleroma.Signature
|
|
|
|
alias Pleroma.Web.ActivityPub.InternalFetchActor
|
2020-06-14 14:01:14 -06:00
|
|
|
alias Pleroma.Web.ActivityPub.ObjectValidator
|
2018-12-01 15:53:10 -07:00
|
|
|
alias Pleroma.Web.ActivityPub.Transmogrifier
|
2020-02-15 10:41:38 -07:00
|
|
|
alias Pleroma.Web.Federator
|
2018-12-01 15:53:10 -07:00
|
|
|
|
|
|
|
require Logger
|
2019-09-18 10:53:51 -06:00
|
|
|
require Pleroma.Constants
|
2018-12-01 15:53:10 -07:00
|
|
|
|
2024-03-13 14:42:51 -06:00
|
|
|
@moduledoc """
|
|
|
|
This module deals with correctly fetching Acitivity Pub objects in a safe way.
|
|
|
|
|
|
|
|
The core function is `fetch_and_contain_remote_object_from_id/1` which performs
|
|
|
|
the actual fetch and common safety and authenticity checks. Other `fetch_*`
|
|
|
|
function use the former and perform some additional tasks
|
|
|
|
"""
|
|
|
|
|
2024-03-13 15:12:17 -06:00
|
|
|
@mix_env Mix.env()
|
|
|
|
|
2019-09-18 10:07:25 -06:00
|
|
|
defp touch_changeset(changeset) do
|
|
|
|
updated_at =
|
|
|
|
NaiveDateTime.utc_now()
|
|
|
|
|> NaiveDateTime.truncate(:second)
|
|
|
|
|
|
|
|
Ecto.Changeset.put_change(changeset, :updated_at, updated_at)
|
|
|
|
end
|
|
|
|
|
2020-06-14 14:24:00 -06:00
|
|
|
defp maybe_reinject_internal_fields(%{data: %{} = old_data}, new_data) do
|
2022-09-06 13:24:02 -06:00
|
|
|
has_history? = fn
|
|
|
|
%{"formerRepresentations" => %{"orderedItems" => list}} when is_list(list) -> true
|
|
|
|
_ -> false
|
|
|
|
end
|
|
|
|
|
2019-09-18 10:53:51 -06:00
|
|
|
internal_fields = Map.take(old_data, Pleroma.Constants.object_internal_fields())
|
|
|
|
|
2022-09-06 13:24:02 -06:00
|
|
|
remote_history_exists? = has_history?.(new_data)
|
|
|
|
|
|
|
|
# If the remote history exists, we treat that as the only source of truth.
|
|
|
|
new_data =
|
|
|
|
if has_history?.(old_data) and not remote_history_exists? do
|
|
|
|
Map.put(new_data, "formerRepresentations", old_data["formerRepresentations"])
|
|
|
|
else
|
|
|
|
new_data
|
|
|
|
end
|
|
|
|
|
|
|
|
# If the remote does not have history information, we need to manage it ourselves
|
|
|
|
new_data =
|
|
|
|
if not remote_history_exists? do
|
|
|
|
changed? =
|
|
|
|
Pleroma.Constants.status_updatable_fields()
|
|
|
|
|> Enum.any?(fn field -> Map.get(old_data, field) != Map.get(new_data, field) end)
|
|
|
|
|
|
|
|
%{updated_object: updated_object} =
|
|
|
|
new_data
|
|
|
|
|> Object.Updater.maybe_update_history(old_data,
|
|
|
|
updated: changed?,
|
|
|
|
use_history_in_new_object?: false
|
|
|
|
)
|
|
|
|
|
|
|
|
updated_object
|
|
|
|
else
|
|
|
|
new_data
|
|
|
|
end
|
|
|
|
|
2020-06-14 14:24:00 -06:00
|
|
|
Map.merge(new_data, internal_fields)
|
2019-09-18 10:53:51 -06:00
|
|
|
end
|
|
|
|
|
2020-06-14 14:24:00 -06:00
|
|
|
defp maybe_reinject_internal_fields(_, new_data), do: new_data
|
2019-09-18 10:53:51 -06:00
|
|
|
|
2019-09-18 22:35:34 -06:00
|
|
|
@spec reinject_object(struct(), map()) :: {:ok, Object.t()} | {:error, any()}
|
2020-06-14 14:01:14 -06:00
|
|
|
defp reinject_object(%Object{data: %{"type" => "Question"}} = object, new_data) do
|
|
|
|
Logger.debug("Reinjecting object #{new_data["id"]}")
|
|
|
|
|
2020-08-24 18:21:19 -06:00
|
|
|
with data <- maybe_reinject_internal_fields(object, new_data),
|
2020-06-14 14:01:14 -06:00
|
|
|
{:ok, data, _} <- ObjectValidator.validate(data, %{}),
|
|
|
|
changeset <- Object.change(object, %{data: data}),
|
|
|
|
changeset <- touch_changeset(changeset),
|
|
|
|
{:ok, object} <- Repo.insert_or_update(changeset),
|
|
|
|
{:ok, object} <- Object.set_cache(object) do
|
|
|
|
{:ok, object}
|
|
|
|
else
|
|
|
|
e ->
|
|
|
|
Logger.error("Error while processing object: #{inspect(e)}")
|
|
|
|
{:error, e}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-06-14 14:24:00 -06:00
|
|
|
defp reinject_object(%Object{} = object, new_data) do
|
|
|
|
Logger.debug("Reinjecting object #{new_data["id"]}")
|
2019-05-20 18:41:58 -06:00
|
|
|
|
2020-06-14 14:24:00 -06:00
|
|
|
with new_data <- Transmogrifier.fix_object(new_data),
|
|
|
|
data <- maybe_reinject_internal_fields(object, new_data),
|
|
|
|
changeset <- Object.change(object, %{data: data}),
|
2019-09-18 10:07:25 -06:00
|
|
|
changeset <- touch_changeset(changeset),
|
2019-11-06 04:00:03 -07:00
|
|
|
{:ok, object} <- Repo.insert_or_update(changeset),
|
|
|
|
{:ok, object} <- Object.set_cache(object) do
|
2019-05-20 18:41:58 -06:00
|
|
|
{:ok, object}
|
|
|
|
else
|
|
|
|
e ->
|
|
|
|
Logger.error("Error while processing object: #{inspect(e)}")
|
|
|
|
{:error, e}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-03-13 14:42:51 -06:00
|
|
|
@doc "Assumes object already is in our database and refetches from remote to update (e.g. for polls)"
|
2019-09-18 09:13:21 -06:00
|
|
|
def refetch_object(%Object{data: %{"id" => id}} = object) do
|
2019-11-23 12:55:41 -07:00
|
|
|
with {:local, false} <- {:local, Object.local?(object)},
|
2020-06-14 14:24:00 -06:00
|
|
|
{:ok, new_data} <- fetch_and_contain_remote_object_from_id(id),
|
|
|
|
{:ok, object} <- reinject_object(object, new_data) do
|
2019-09-18 09:13:21 -06:00
|
|
|
{:ok, object}
|
|
|
|
else
|
2019-11-06 15:40:55 -07:00
|
|
|
{:local, true} -> {:ok, object}
|
2019-09-18 09:13:21 -06:00
|
|
|
e -> {:error, e}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-03-13 14:42:51 -06:00
|
|
|
@doc """
|
|
|
|
Fetches a new object and puts it through the processing pipeline for inbound objects
|
|
|
|
|
|
|
|
Note: will also insert a fake Create activity, since atm we internally
|
|
|
|
need everything to be traced back to a Create activity.
|
|
|
|
"""
|
2019-06-29 11:04:50 -06:00
|
|
|
def fetch_object_from_id(id, options \\ []) do
|
2022-12-09 17:09:45 -07:00
|
|
|
with %URI{} = uri <- URI.parse(id),
|
2023-08-04 05:50:50 -06:00
|
|
|
# let's check the URI is even vaguely valid first
|
|
|
|
{:scheme, true} <- {:scheme, uri.scheme == "http" or uri.scheme == "https"},
|
2022-12-09 17:09:45 -07:00
|
|
|
# If we have instance restrictions, apply them here to prevent fetching from unwanted instances
|
|
|
|
{:ok, nil} <- Pleroma.Web.ActivityPub.MRF.SimplePolicy.check_reject(uri),
|
|
|
|
{:ok, _} <- Pleroma.Web.ActivityPub.MRF.SimplePolicy.check_accept(uri),
|
|
|
|
{_, nil} <- {:fetch_object, Object.get_cached_by_ap_id(id)},
|
2020-02-15 10:41:38 -07:00
|
|
|
{_, true} <- {:allowed_depth, Federator.allowed_thread_distance?(options[:depth])},
|
|
|
|
{_, {:ok, data}} <- {:fetch, fetch_and_contain_remote_object_from_id(id)},
|
2021-01-04 05:38:31 -07:00
|
|
|
{_, nil} <- {:normalize, Object.normalize(data, fetch: false)},
|
2019-09-10 22:23:33 -06:00
|
|
|
params <- prepare_activity_params(data),
|
2020-02-15 10:41:38 -07:00
|
|
|
{_, {:ok, activity}} <-
|
2019-10-17 22:08:25 -06:00
|
|
|
{:transmogrifier, Transmogrifier.handle_incoming(params, options)},
|
2020-02-15 10:41:38 -07:00
|
|
|
{_, _data, %Object{} = object} <-
|
2021-01-04 05:38:31 -07:00
|
|
|
{:object, data, Object.normalize(activity, fetch: false)} do
|
2018-12-01 15:53:10 -07:00
|
|
|
{:ok, object}
|
|
|
|
else
|
2020-02-15 10:41:38 -07:00
|
|
|
{:allowed_depth, false} ->
|
|
|
|
{:error, "Max thread distance exceeded."}
|
|
|
|
|
2023-08-04 05:50:50 -06:00
|
|
|
{:scheme, false} ->
|
|
|
|
{:error, "URI Scheme Invalid"}
|
|
|
|
|
2020-09-11 11:58:58 -06:00
|
|
|
{:transmogrifier, {:error, {:reject, e}}} ->
|
|
|
|
{:reject, e}
|
2018-12-01 15:53:10 -07:00
|
|
|
|
2020-09-17 08:17:16 -06:00
|
|
|
{:transmogrifier, {:reject, e}} ->
|
|
|
|
{:reject, e}
|
|
|
|
|
2020-06-26 12:10:47 -06:00
|
|
|
{:transmogrifier, _} = e ->
|
|
|
|
{:error, e}
|
2019-10-17 21:41:38 -06:00
|
|
|
|
2019-09-10 22:23:33 -06:00
|
|
|
{:object, data, nil} ->
|
2019-09-18 22:35:34 -06:00
|
|
|
reinject_object(%Object{}, data)
|
2019-05-20 18:41:58 -06:00
|
|
|
|
2019-09-10 22:23:33 -06:00
|
|
|
{:normalize, object = %Object{}} ->
|
|
|
|
{:ok, object}
|
2018-12-01 15:53:10 -07:00
|
|
|
|
2019-09-10 22:23:33 -06:00
|
|
|
{:fetch_object, %Object{} = object} ->
|
|
|
|
{:ok, object}
|
2018-12-01 15:53:10 -07:00
|
|
|
|
2019-10-24 10:08:34 -06:00
|
|
|
{:fetch, {:error, error}} ->
|
|
|
|
{:error, error}
|
|
|
|
|
2022-12-09 17:09:45 -07:00
|
|
|
{:reject, reason} ->
|
|
|
|
{:reject, reason}
|
|
|
|
|
2019-10-17 17:37:21 -06:00
|
|
|
e ->
|
|
|
|
e
|
2018-12-01 15:53:10 -07:00
|
|
|
end
|
2019-09-10 22:23:33 -06:00
|
|
|
end
|
|
|
|
|
|
|
|
defp prepare_activity_params(data) do
|
|
|
|
%{
|
|
|
|
"type" => "Create",
|
|
|
|
# Should we seriously keep this attributedTo thing?
|
|
|
|
"actor" => data["actor"] || data["attributedTo"],
|
|
|
|
"object" => data
|
|
|
|
}
|
2020-09-15 09:22:08 -06:00
|
|
|
|> Maps.put_if_present("to", data["to"])
|
|
|
|
|> Maps.put_if_present("cc", data["cc"])
|
|
|
|
|> Maps.put_if_present("bto", data["bto"])
|
|
|
|
|> Maps.put_if_present("bcc", data["bcc"])
|
2018-12-01 15:53:10 -07:00
|
|
|
end
|
|
|
|
|
2024-03-13 14:42:51 -06:00
|
|
|
@doc "Identical to `fetch_object_from_id/2` but just directly returns the object or on error `nil`"
|
2019-06-29 11:04:50 -06:00
|
|
|
def fetch_object_from_id!(id, options \\ []) do
|
|
|
|
with {:ok, object} <- fetch_object_from_id(id, options) do
|
2018-12-03 20:17:25 -07:00
|
|
|
object
|
|
|
|
else
|
2019-10-24 10:08:34 -06:00
|
|
|
{:error, %Tesla.Mock.Error{}} ->
|
|
|
|
nil
|
|
|
|
|
2022-12-09 13:13:31 -07:00
|
|
|
{:error, {"Object has been deleted", _id, _code}} ->
|
2019-12-13 12:14:11 -07:00
|
|
|
nil
|
|
|
|
|
2020-07-13 06:23:03 -06:00
|
|
|
{:reject, reason} ->
|
2022-11-26 12:27:58 -07:00
|
|
|
Logger.debug("Rejected #{id} while fetching: #{inspect(reason)}")
|
2020-07-13 06:23:03 -06:00
|
|
|
nil
|
|
|
|
|
2019-10-17 21:41:38 -06:00
|
|
|
e ->
|
|
|
|
Logger.error("Error while fetching #{id}: #{inspect(e)}")
|
2018-12-03 20:17:25 -07:00
|
|
|
nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-07-17 16:41:42 -06:00
|
|
|
defp make_signature(id, date) do
|
|
|
|
uri = URI.parse(id)
|
|
|
|
|
|
|
|
signature =
|
|
|
|
InternalFetchActor.get_actor()
|
|
|
|
|> Signature.sign(%{
|
|
|
|
"(request-target)": "get #{uri.path}",
|
|
|
|
host: uri.host,
|
|
|
|
date: date
|
|
|
|
})
|
|
|
|
|
2020-09-02 00:16:51 -06:00
|
|
|
{"signature", signature}
|
2019-07-17 16:41:42 -06:00
|
|
|
end
|
|
|
|
|
|
|
|
defp sign_fetch(headers, id, date) do
|
|
|
|
if Pleroma.Config.get([:activitypub, :sign_object_fetches]) do
|
2020-09-02 00:16:51 -06:00
|
|
|
[make_signature(id, date) | headers]
|
2019-07-17 16:41:42 -06:00
|
|
|
else
|
|
|
|
headers
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp maybe_date_fetch(headers, date) do
|
|
|
|
if Pleroma.Config.get([:activitypub, :sign_object_fetches]) do
|
2020-09-02 00:16:51 -06:00
|
|
|
[{"date", date} | headers]
|
2019-07-17 16:41:42 -06:00
|
|
|
else
|
|
|
|
headers
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-03-13 14:42:51 -06:00
|
|
|
@doc "Fetches arbitrary remote object and performs basic safety and authenticity checks"
|
2020-11-17 07:28:30 -07:00
|
|
|
def fetch_and_contain_remote_object_from_id(id)
|
2020-09-18 05:58:22 -06:00
|
|
|
|
2020-11-17 07:28:30 -07:00
|
|
|
def fetch_and_contain_remote_object_from_id(%{"id" => id}),
|
|
|
|
do: fetch_and_contain_remote_object_from_id(id)
|
2020-09-18 05:58:22 -06:00
|
|
|
|
2020-11-17 07:28:30 -07:00
|
|
|
def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do
|
2019-12-10 01:08:57 -07:00
|
|
|
Logger.debug("Fetching object #{id} via AP")
|
2018-12-01 15:53:10 -07:00
|
|
|
|
2020-09-18 05:58:22 -06:00
|
|
|
with {:scheme, true} <- {:scheme, String.starts_with?(id, "http")},
|
2024-03-13 16:00:23 -06:00
|
|
|
{_, :ok} <- {:local_fetch, Containment.contain_local_fetch(id)},
|
2020-11-17 07:28:30 -07:00
|
|
|
{:ok, body} <- get_object(id),
|
2020-09-18 05:58:22 -06:00
|
|
|
{:ok, data} <- safe_json_decode(body),
|
2024-03-13 15:21:19 -06:00
|
|
|
{_, :ok} <- {:containment, Containment.contain_origin_from_id(id, data)},
|
|
|
|
{_, :ok} <- {:containment, Containment.contain_origin(id, data)} do
|
2022-11-15 10:23:47 -07:00
|
|
|
unless Instances.reachable?(id) do
|
|
|
|
Instances.set_reachable(id)
|
|
|
|
end
|
|
|
|
|
2020-09-18 05:58:22 -06:00
|
|
|
{:ok, data}
|
|
|
|
else
|
|
|
|
{:scheme, _} ->
|
|
|
|
{:error, "Unsupported URI scheme"}
|
|
|
|
|
2024-03-13 16:00:23 -06:00
|
|
|
{:local_fetch, _} ->
|
|
|
|
{:error, "Trying to fetch local resource"}
|
|
|
|
|
2024-03-13 15:21:19 -06:00
|
|
|
{:containment, _} ->
|
|
|
|
{:error, "Object containment failed."}
|
|
|
|
|
2020-09-18 05:58:22 -06:00
|
|
|
{:error, e} ->
|
|
|
|
{:error, e}
|
|
|
|
|
|
|
|
e ->
|
|
|
|
{:error, e}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-11-17 07:28:30 -07:00
|
|
|
def fetch_and_contain_remote_object_from_id(_id),
|
2020-09-18 05:58:22 -06:00
|
|
|
do: {:error, "id must be a string"}
|
|
|
|
|
2024-03-13 15:12:17 -06:00
|
|
|
defp check_crossdomain_redirect(final_host, original_url)
|
|
|
|
|
|
|
|
# HOPEFULLY TEMPORARY
|
|
|
|
# Basically none of our Tesla mocks in tests set the (supposed to
|
|
|
|
# exist for Tesla proper) url parameter for their responses
|
|
|
|
# causing almost every fetch in test to fail otherwise
|
|
|
|
if @mix_env == :test do
|
|
|
|
defp check_crossdomain_redirect(nil, _) do
|
|
|
|
{:cross_domain_redirect, false}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp check_crossdomain_redirect(final_host, original_url) do
|
|
|
|
{:cross_domain_redirect, final_host != URI.parse(original_url).host}
|
|
|
|
end
|
|
|
|
|
2024-03-13 14:42:51 -06:00
|
|
|
@doc "Do NOT use; only public for use in tests"
|
2022-12-12 12:06:04 -07:00
|
|
|
def get_object(id) do
|
2019-08-22 13:39:06 -06:00
|
|
|
date = Pleroma.Signature.signed_date()
|
2019-07-17 16:41:42 -06:00
|
|
|
|
|
|
|
headers =
|
2020-02-11 00:12:57 -07:00
|
|
|
[{"accept", "application/activity+json"}]
|
2019-07-17 16:41:42 -06:00
|
|
|
|> maybe_date_fetch(date)
|
|
|
|
|> sign_fetch(id, date)
|
|
|
|
|
2024-03-13 15:12:17 -06:00
|
|
|
with {:ok, %{body: body, status: code, headers: headers, url: final_url}}
|
|
|
|
when code in 200..299 <-
|
2024-03-11 15:52:46 -06:00
|
|
|
HTTP.get(id, headers),
|
2024-03-13 15:12:17 -06:00
|
|
|
remote_host <-
|
|
|
|
URI.parse(final_url).host,
|
|
|
|
{:cross_domain_redirect, false} <-
|
|
|
|
check_crossdomain_redirect(remote_host, id),
|
2024-03-11 15:52:46 -06:00
|
|
|
{:has_content_type, {_, content_type}} <-
|
|
|
|
{:has_content_type, List.keyfind(headers, "content-type", 0)},
|
|
|
|
{:parse_content_type, {:ok, "application", subtype, type_params}} <-
|
|
|
|
{:parse_content_type, Plug.Conn.Utils.media_type(content_type)} do
|
|
|
|
case {subtype, type_params} do
|
|
|
|
{"activity+json", _} ->
|
|
|
|
{:ok, body}
|
|
|
|
|
|
|
|
{"ld+json", %{"profile" => "https://www.w3.org/ns/activitystreams"}} ->
|
|
|
|
{:ok, body}
|
|
|
|
|
|
|
|
# pixelfed sometimes (and only sometimes) responds with http instead of https
|
|
|
|
{"ld+json", %{"profile" => "http://www.w3.org/ns/activitystreams"}} ->
|
|
|
|
{:ok, body}
|
|
|
|
|
|
|
|
_ ->
|
|
|
|
{:error, {:content_type, content_type}}
|
|
|
|
end
|
|
|
|
else
|
2019-06-13 04:13:35 -06:00
|
|
|
{:ok, %{status: code}} when code in [404, 410] ->
|
2022-12-09 13:13:31 -07:00
|
|
|
{:error, {"Object has been deleted", id, code}}
|
2019-06-13 03:34:03 -06:00
|
|
|
|
2019-10-24 10:08:34 -06:00
|
|
|
{:error, e} ->
|
|
|
|
{:error, e}
|
|
|
|
|
2024-03-11 15:52:46 -06:00
|
|
|
{:has_content_type, _} ->
|
|
|
|
{:error, {:content_type, nil}}
|
|
|
|
|
|
|
|
{:parse_content_type, e} ->
|
|
|
|
{:error, {:content_type, e}}
|
|
|
|
|
2019-10-17 20:42:25 -06:00
|
|
|
e ->
|
|
|
|
{:error, e}
|
2018-12-01 15:53:10 -07:00
|
|
|
end
|
|
|
|
end
|
2019-07-13 10:17:57 -06:00
|
|
|
|
2020-09-18 05:58:22 -06:00
|
|
|
defp safe_json_decode(nil), do: {:ok, nil}
|
|
|
|
defp safe_json_decode(json), do: Jason.decode(json)
|
2018-12-01 15:53:10 -07:00
|
|
|
end
|