2019-07-09 23:13:23 -06:00
|
|
|
# Pleroma: A lightweight social networking server
|
2021-01-12 23:49:20 -07:00
|
|
|
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
|
2019-07-09 23:13:23 -06:00
|
|
|
# SPDX-License-Identifier: AGPL-3.0-only
|
|
|
|
|
2018-12-01 15:53:10 -07:00
|
|
|
defmodule Pleroma.Object.Fetcher do
|
2019-05-24 22:24:21 -06:00
|
|
|
alias Pleroma.HTTP
|
2022-11-15 10:23:47 -07:00
|
|
|
alias Pleroma.Instances
|
2020-09-15 09:22:08 -06:00
|
|
|
alias Pleroma.Maps
|
2019-04-17 03:22:32 -06:00
|
|
|
alias Pleroma.Object
|
2018-12-01 15:53:10 -07:00
|
|
|
alias Pleroma.Object.Containment
|
2019-09-18 09:13:21 -06:00
|
|
|
alias Pleroma.Repo
|
2019-07-17 16:41:42 -06:00
|
|
|
alias Pleroma.Signature
|
|
|
|
alias Pleroma.Web.ActivityPub.InternalFetchActor
|
2020-06-14 14:01:14 -06:00
|
|
|
alias Pleroma.Web.ActivityPub.ObjectValidator
|
2018-12-01 15:53:10 -07:00
|
|
|
alias Pleroma.Web.ActivityPub.Transmogrifier
|
2020-02-15 10:41:38 -07:00
|
|
|
alias Pleroma.Web.Federator
|
2018-12-01 15:53:10 -07:00
|
|
|
|
|
|
|
require Logger
|
2019-09-18 10:53:51 -06:00
|
|
|
require Pleroma.Constants
|
2018-12-01 15:53:10 -07:00
|
|
|
|
2024-03-13 14:42:51 -06:00
|
|
|
@moduledoc """
|
|
|
|
This module deals with correctly fetching Acitivity Pub objects in a safe way.
|
|
|
|
|
|
|
|
The core function is `fetch_and_contain_remote_object_from_id/1` which performs
|
|
|
|
the actual fetch and common safety and authenticity checks. Other `fetch_*`
|
|
|
|
function use the former and perform some additional tasks
|
|
|
|
"""
|
|
|
|
|
2024-03-13 15:12:17 -06:00
|
|
|
@mix_env Mix.env()
|
|
|
|
|
2019-09-18 10:07:25 -06:00
|
|
|
defp touch_changeset(changeset) do
|
|
|
|
updated_at =
|
|
|
|
NaiveDateTime.utc_now()
|
|
|
|
|> NaiveDateTime.truncate(:second)
|
|
|
|
|
|
|
|
Ecto.Changeset.put_change(changeset, :updated_at, updated_at)
|
|
|
|
end
|
|
|
|
|
2020-06-14 14:24:00 -06:00
|
|
|
defp maybe_reinject_internal_fields(%{data: %{} = old_data}, new_data) do
|
2022-09-06 13:24:02 -06:00
|
|
|
has_history? = fn
|
|
|
|
%{"formerRepresentations" => %{"orderedItems" => list}} when is_list(list) -> true
|
|
|
|
_ -> false
|
|
|
|
end
|
|
|
|
|
2019-09-18 10:53:51 -06:00
|
|
|
internal_fields = Map.take(old_data, Pleroma.Constants.object_internal_fields())
|
|
|
|
|
2022-09-06 13:24:02 -06:00
|
|
|
remote_history_exists? = has_history?.(new_data)
|
|
|
|
|
|
|
|
# If the remote history exists, we treat that as the only source of truth.
|
|
|
|
new_data =
|
|
|
|
if has_history?.(old_data) and not remote_history_exists? do
|
|
|
|
Map.put(new_data, "formerRepresentations", old_data["formerRepresentations"])
|
|
|
|
else
|
|
|
|
new_data
|
|
|
|
end
|
|
|
|
|
|
|
|
# If the remote does not have history information, we need to manage it ourselves
|
|
|
|
new_data =
|
|
|
|
if not remote_history_exists? do
|
|
|
|
changed? =
|
|
|
|
Pleroma.Constants.status_updatable_fields()
|
|
|
|
|> Enum.any?(fn field -> Map.get(old_data, field) != Map.get(new_data, field) end)
|
|
|
|
|
|
|
|
%{updated_object: updated_object} =
|
|
|
|
new_data
|
|
|
|
|> Object.Updater.maybe_update_history(old_data,
|
|
|
|
updated: changed?,
|
|
|
|
use_history_in_new_object?: false
|
|
|
|
)
|
|
|
|
|
|
|
|
updated_object
|
|
|
|
else
|
|
|
|
new_data
|
|
|
|
end
|
|
|
|
|
2020-06-14 14:24:00 -06:00
|
|
|
Map.merge(new_data, internal_fields)
|
2019-09-18 10:53:51 -06:00
|
|
|
end
|
|
|
|
|
2020-06-14 14:24:00 -06:00
|
|
|
defp maybe_reinject_internal_fields(_, new_data), do: new_data
|
2019-09-18 10:53:51 -06:00
|
|
|
|
2019-09-18 22:35:34 -06:00
|
|
|
@spec reinject_object(struct(), map()) :: {:ok, Object.t()} | {:error, any()}
|
2020-06-14 14:01:14 -06:00
|
|
|
defp reinject_object(%Object{data: %{"type" => "Question"}} = object, new_data) do
|
|
|
|
Logger.debug("Reinjecting object #{new_data["id"]}")
|
|
|
|
|
2020-08-24 18:21:19 -06:00
|
|
|
with data <- maybe_reinject_internal_fields(object, new_data),
|
2020-06-14 14:01:14 -06:00
|
|
|
{:ok, data, _} <- ObjectValidator.validate(data, %{}),
|
|
|
|
changeset <- Object.change(object, %{data: data}),
|
|
|
|
changeset <- touch_changeset(changeset),
|
|
|
|
{:ok, object} <- Repo.insert_or_update(changeset),
|
|
|
|
{:ok, object} <- Object.set_cache(object) do
|
|
|
|
{:ok, object}
|
|
|
|
else
|
|
|
|
e ->
|
|
|
|
Logger.error("Error while processing object: #{inspect(e)}")
|
|
|
|
{:error, e}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-06-14 14:24:00 -06:00
|
|
|
defp reinject_object(%Object{} = object, new_data) do
|
|
|
|
Logger.debug("Reinjecting object #{new_data["id"]}")
|
2019-05-20 18:41:58 -06:00
|
|
|
|
2020-06-14 14:24:00 -06:00
|
|
|
with new_data <- Transmogrifier.fix_object(new_data),
|
|
|
|
data <- maybe_reinject_internal_fields(object, new_data),
|
|
|
|
changeset <- Object.change(object, %{data: data}),
|
2019-09-18 10:07:25 -06:00
|
|
|
changeset <- touch_changeset(changeset),
|
2019-11-06 04:00:03 -07:00
|
|
|
{:ok, object} <- Repo.insert_or_update(changeset),
|
|
|
|
{:ok, object} <- Object.set_cache(object) do
|
2019-05-20 18:41:58 -06:00
|
|
|
{:ok, object}
|
|
|
|
else
|
|
|
|
e ->
|
|
|
|
Logger.error("Error while processing object: #{inspect(e)}")
|
|
|
|
{:error, e}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-03-13 14:42:51 -06:00
|
|
|
@doc "Assumes object already is in our database and refetches from remote to update (e.g. for polls)"
|
2019-09-18 09:13:21 -06:00
|
|
|
def refetch_object(%Object{data: %{"id" => id}} = object) do
|
2019-11-23 12:55:41 -07:00
|
|
|
with {:local, false} <- {:local, Object.local?(object)},
|
2020-06-14 14:24:00 -06:00
|
|
|
{:ok, new_data} <- fetch_and_contain_remote_object_from_id(id),
|
2024-03-24 12:32:28 -06:00
|
|
|
{:id, true} <- {:id, new_data["id"] == id},
|
2020-06-14 14:24:00 -06:00
|
|
|
{:ok, object} <- reinject_object(object, new_data) do
|
2019-09-18 09:13:21 -06:00
|
|
|
{:ok, object}
|
|
|
|
else
|
2019-11-06 15:40:55 -07:00
|
|
|
{:local, true} -> {:ok, object}
|
2024-04-13 15:56:04 -06:00
|
|
|
{:id, false} -> {:error, :id_mismatch}
|
2019-09-18 09:13:21 -06:00
|
|
|
e -> {:error, e}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-03-13 14:42:51 -06:00
|
|
|
@doc """
|
|
|
|
Fetches a new object and puts it through the processing pipeline for inbound objects
|
|
|
|
|
|
|
|
Note: will also insert a fake Create activity, since atm we internally
|
|
|
|
need everything to be traced back to a Create activity.
|
|
|
|
"""
|
2019-06-29 11:04:50 -06:00
|
|
|
def fetch_object_from_id(id, options \\ []) do
|
2022-12-09 17:09:45 -07:00
|
|
|
with %URI{} = uri <- URI.parse(id),
|
2023-08-04 05:50:50 -06:00
|
|
|
# let's check the URI is even vaguely valid first
|
2024-04-13 16:55:26 -06:00
|
|
|
{:valid_uri_scheme, true} <-
|
|
|
|
{:valid_uri_scheme, uri.scheme == "http" or uri.scheme == "https"},
|
2022-12-09 17:09:45 -07:00
|
|
|
# If we have instance restrictions, apply them here to prevent fetching from unwanted instances
|
2024-04-13 16:55:26 -06:00
|
|
|
{:mrf_reject_check, {:ok, nil}} <-
|
|
|
|
{:mrf_reject_check, Pleroma.Web.ActivityPub.MRF.SimplePolicy.check_reject(uri)},
|
|
|
|
{:mrf_accept_check, {:ok, _}} <-
|
|
|
|
{:mrf_accept_check, Pleroma.Web.ActivityPub.MRF.SimplePolicy.check_accept(uri)},
|
2022-12-09 17:09:45 -07:00
|
|
|
{_, nil} <- {:fetch_object, Object.get_cached_by_ap_id(id)},
|
2020-02-15 10:41:38 -07:00
|
|
|
{_, true} <- {:allowed_depth, Federator.allowed_thread_distance?(options[:depth])},
|
|
|
|
{_, {:ok, data}} <- {:fetch, fetch_and_contain_remote_object_from_id(id)},
|
2021-01-04 05:38:31 -07:00
|
|
|
{_, nil} <- {:normalize, Object.normalize(data, fetch: false)},
|
2019-09-10 22:23:33 -06:00
|
|
|
params <- prepare_activity_params(data),
|
2020-02-15 10:41:38 -07:00
|
|
|
{_, {:ok, activity}} <-
|
2019-10-17 22:08:25 -06:00
|
|
|
{:transmogrifier, Transmogrifier.handle_incoming(params, options)},
|
2020-02-15 10:41:38 -07:00
|
|
|
{_, _data, %Object{} = object} <-
|
2021-01-04 05:38:31 -07:00
|
|
|
{:object, data, Object.normalize(activity, fetch: false)} do
|
2018-12-01 15:53:10 -07:00
|
|
|
{:ok, object}
|
|
|
|
else
|
2023-12-27 18:47:18 -07:00
|
|
|
{:allowed_depth, false} = e ->
|
|
|
|
log_fetch_error(id, e)
|
2023-12-27 20:28:41 -07:00
|
|
|
{:error, :allowed_depth}
|
2020-02-15 10:41:38 -07:00
|
|
|
|
2024-04-13 15:56:04 -06:00
|
|
|
{:valid_uri_scheme, _} = e ->
|
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:error, :invalid_uri_scheme}
|
2023-08-04 05:50:50 -06:00
|
|
|
|
2024-04-13 16:55:26 -06:00
|
|
|
{:mrf_reject_check, _} = e ->
|
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:reject, :mrf}
|
|
|
|
|
|
|
|
{:mrf_accept_check, _} = e ->
|
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:reject, :mrf}
|
|
|
|
|
2023-12-27 18:47:18 -07:00
|
|
|
{:containment, reason} = e ->
|
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:error, reason}
|
2023-12-26 12:22:04 -07:00
|
|
|
|
2023-12-27 18:47:18 -07:00
|
|
|
{:transmogrifier, {:error, {:reject, reason}}} = e ->
|
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:reject, reason}
|
2018-12-01 15:53:10 -07:00
|
|
|
|
2023-12-27 18:47:18 -07:00
|
|
|
{:transmogrifier, {:reject, reason}} = e ->
|
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:reject, reason}
|
2020-09-17 08:17:16 -06:00
|
|
|
|
2023-12-27 18:47:18 -07:00
|
|
|
{:transmogrifier, reason} = e ->
|
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:error, reason}
|
2019-10-17 21:41:38 -06:00
|
|
|
|
2019-09-10 22:23:33 -06:00
|
|
|
{:object, data, nil} ->
|
2019-09-18 22:35:34 -06:00
|
|
|
reinject_object(%Object{}, data)
|
2019-05-20 18:41:58 -06:00
|
|
|
|
2019-09-10 22:23:33 -06:00
|
|
|
{:normalize, object = %Object{}} ->
|
|
|
|
{:ok, object}
|
2018-12-01 15:53:10 -07:00
|
|
|
|
2019-09-10 22:23:33 -06:00
|
|
|
{:fetch_object, %Object{} = object} ->
|
|
|
|
{:ok, object}
|
2018-12-01 15:53:10 -07:00
|
|
|
|
2023-12-27 18:47:18 -07:00
|
|
|
{:fetch, {:error, reason}} = e ->
|
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:error, reason}
|
2019-10-24 10:08:34 -06:00
|
|
|
|
2019-10-17 17:37:21 -06:00
|
|
|
e ->
|
2023-12-27 18:47:18 -07:00
|
|
|
log_fetch_error(id, e)
|
2023-12-26 12:20:33 -07:00
|
|
|
{:error, e}
|
2018-12-01 15:53:10 -07:00
|
|
|
end
|
2019-09-10 22:23:33 -06:00
|
|
|
end
|
|
|
|
|
2023-12-27 18:47:18 -07:00
|
|
|
defp log_fetch_error(id, error) do
|
2023-12-27 19:55:07 -07:00
|
|
|
Logger.metadata(object: id)
|
2023-12-27 18:47:18 -07:00
|
|
|
Logger.error("Object rejected while fetching #{id} #{inspect(error)}")
|
|
|
|
end
|
|
|
|
|
2019-09-10 22:23:33 -06:00
|
|
|
defp prepare_activity_params(data) do
|
|
|
|
%{
|
|
|
|
"type" => "Create",
|
|
|
|
# Should we seriously keep this attributedTo thing?
|
|
|
|
"actor" => data["actor"] || data["attributedTo"],
|
|
|
|
"object" => data
|
|
|
|
}
|
2020-09-15 09:22:08 -06:00
|
|
|
|> Maps.put_if_present("to", data["to"])
|
|
|
|
|> Maps.put_if_present("cc", data["cc"])
|
|
|
|
|> Maps.put_if_present("bto", data["bto"])
|
|
|
|
|> Maps.put_if_present("bcc", data["bcc"])
|
2018-12-01 15:53:10 -07:00
|
|
|
end
|
|
|
|
|
2019-07-17 16:41:42 -06:00
|
|
|
defp make_signature(id, date) do
|
|
|
|
uri = URI.parse(id)
|
|
|
|
|
|
|
|
signature =
|
|
|
|
InternalFetchActor.get_actor()
|
|
|
|
|> Signature.sign(%{
|
|
|
|
"(request-target)": "get #{uri.path}",
|
|
|
|
host: uri.host,
|
|
|
|
date: date
|
|
|
|
})
|
|
|
|
|
2020-09-02 00:16:51 -06:00
|
|
|
{"signature", signature}
|
2019-07-17 16:41:42 -06:00
|
|
|
end
|
|
|
|
|
|
|
|
defp sign_fetch(headers, id, date) do
|
|
|
|
if Pleroma.Config.get([:activitypub, :sign_object_fetches]) do
|
2020-09-02 00:16:51 -06:00
|
|
|
[make_signature(id, date) | headers]
|
2019-07-17 16:41:42 -06:00
|
|
|
else
|
|
|
|
headers
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp maybe_date_fetch(headers, date) do
|
|
|
|
if Pleroma.Config.get([:activitypub, :sign_object_fetches]) do
|
2020-09-02 00:16:51 -06:00
|
|
|
[{"date", date} | headers]
|
2019-07-17 16:41:42 -06:00
|
|
|
else
|
|
|
|
headers
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-03-13 14:42:51 -06:00
|
|
|
@doc "Fetches arbitrary remote object and performs basic safety and authenticity checks"
|
2020-11-17 07:28:30 -07:00
|
|
|
def fetch_and_contain_remote_object_from_id(id)
|
2020-09-18 05:58:22 -06:00
|
|
|
|
2020-11-17 07:28:30 -07:00
|
|
|
def fetch_and_contain_remote_object_from_id(%{"id" => id}),
|
|
|
|
do: fetch_and_contain_remote_object_from_id(id)
|
2020-09-18 05:58:22 -06:00
|
|
|
|
2020-11-17 07:28:30 -07:00
|
|
|
def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do
|
2019-12-10 01:08:57 -07:00
|
|
|
Logger.debug("Fetching object #{id} via AP")
|
2018-12-01 15:53:10 -07:00
|
|
|
|
2024-04-13 15:56:04 -06:00
|
|
|
with {:valid_uri_scheme, true} <- {:valid_uri_scheme, String.starts_with?(id, "http")},
|
2024-04-13 16:55:26 -06:00
|
|
|
%URI{} = uri <- URI.parse(id),
|
|
|
|
{:mrf_reject_check, {:ok, nil}} <-
|
|
|
|
{:mrf_reject_check, Pleroma.Web.ActivityPub.MRF.SimplePolicy.check_reject(uri)},
|
|
|
|
{:mrf_accept_check, {:ok, _}} <-
|
|
|
|
{:mrf_accept_check, Pleroma.Web.ActivityPub.MRF.SimplePolicy.check_accept(uri)},
|
|
|
|
{:local_fetch, :ok} <- {:local_fetch, Containment.contain_local_fetch(id)},
|
2024-03-15 13:57:09 -06:00
|
|
|
{:ok, final_id, body} <- get_object(id),
|
2020-09-18 05:58:22 -06:00
|
|
|
{:ok, data} <- safe_json_decode(body),
|
Only allow exact id matches
This protects us from falling for obvious spoofs as from the current
upload exploit (unfortunately we can’t reasonably do anything about
spoofs with exact matches as was possible via emoji and proxy).
Such objects being invalid is supported by the spec, sepcifically
sections 3.1 and 3.2: https://www.w3.org/TR/activitypub/#obj-id
Anonymous objects are not relevant here (they can only exists within
parent objects iiuc) and neither is client-to-server or transient objects
(as those cannot be fetched in the first place).
This leaves us with the requirement for `id` to (a) exist and
(b) be a publicly dereferencable URI from the originating server.
This alone does not yet demand strict equivalence, but the spec then
further explains objects ought to be fetchable _via their ID_.
Meaning an object not retrievable via its ID, is invalid.
This reading is supported by the fact, e.g. GoToSocial (recently) and
Mastodon (for 6+ years) do already implement such strict ID checks,
additionally proving this doesn’t cause federation issues in practice.
However, apart from canonical IDs there can also be additional display
URLs. *omas first redirect those to their canonical location, but *keys
and Mastodon directly serve the AP representation without redirects.
Mastodon and GTS deal with this in two different ways,
but both constitute an effective countermeasure:
- Mastodon:
Unless it already is a known AP id, two fetches occur.
The first fetch just reads the `id` property and then refetches from
the id. The last fetch requires the returned id to exactly match the
URL the content was fetched from. (This can be optimised by skipping
the second fetch if it already matches)
https://github.com/mastodon/mastodon/blob/05eda8d19330a9c27c0cf07de19a87edff269057/app/helpers/jsonld_helper.rb#L168
https://github.com/mastodon/mastodon/commit/63f097979990bf5ba9db848b8a253056bad781af
- GTS:
Only does a single fetch and then checks if _either_ the id
_or_ url property (which can be an object) match the original fetch
URL. This relies on implementations always including their display URL
as "url" if differing from the id. For actors this is true for all
investigated implementations, for posts only Mastodon includes an
"url", but it is also the only one with a differing display URL.
https://github.com/superseriousbusiness/gotosocial/commit/2bafd7daf542d985ee76d9079a30a602cb7be827#diff-943bbb02c8ac74ac5dc5d20807e561dcdfaebdc3b62b10730f643a20ac23c24fR222
Albeit Mastodon’s refetch offers higher compatibility with theoretical
implmentations using either multiple different display URL or not
denoting any of them as "url" at all, for now we chose to adopt a
GTS-like refetch-free approach to avoid additional implementation
concerns wrt to whether redirects should be allowed when fetching a
canonical AP id and potential for accidentally loosening some checks
(e.g. cross-domain refetches) for one of the fetches.
This may be reconsidered in the future.
2024-03-15 18:00:19 -06:00
|
|
|
{_, :ok} <- {:strict_id, Containment.contain_id_to_fetch(final_id, data)},
|
2024-03-15 13:57:09 -06:00
|
|
|
{_, :ok} <- {:containment, Containment.contain_origin(final_id, data)} do
|
|
|
|
unless Instances.reachable?(final_id) do
|
|
|
|
Instances.set_reachable(final_id)
|
2022-11-15 10:23:47 -07:00
|
|
|
end
|
|
|
|
|
2020-09-18 05:58:22 -06:00
|
|
|
{:ok, data}
|
|
|
|
else
|
2024-04-13 16:55:26 -06:00
|
|
|
{:strict_id, _} = e ->
|
2024-04-13 15:56:04 -06:00
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:error, :id_mismatch}
|
Only allow exact id matches
This protects us from falling for obvious spoofs as from the current
upload exploit (unfortunately we can’t reasonably do anything about
spoofs with exact matches as was possible via emoji and proxy).
Such objects being invalid is supported by the spec, sepcifically
sections 3.1 and 3.2: https://www.w3.org/TR/activitypub/#obj-id
Anonymous objects are not relevant here (they can only exists within
parent objects iiuc) and neither is client-to-server or transient objects
(as those cannot be fetched in the first place).
This leaves us with the requirement for `id` to (a) exist and
(b) be a publicly dereferencable URI from the originating server.
This alone does not yet demand strict equivalence, but the spec then
further explains objects ought to be fetchable _via their ID_.
Meaning an object not retrievable via its ID, is invalid.
This reading is supported by the fact, e.g. GoToSocial (recently) and
Mastodon (for 6+ years) do already implement such strict ID checks,
additionally proving this doesn’t cause federation issues in practice.
However, apart from canonical IDs there can also be additional display
URLs. *omas first redirect those to their canonical location, but *keys
and Mastodon directly serve the AP representation without redirects.
Mastodon and GTS deal with this in two different ways,
but both constitute an effective countermeasure:
- Mastodon:
Unless it already is a known AP id, two fetches occur.
The first fetch just reads the `id` property and then refetches from
the id. The last fetch requires the returned id to exactly match the
URL the content was fetched from. (This can be optimised by skipping
the second fetch if it already matches)
https://github.com/mastodon/mastodon/blob/05eda8d19330a9c27c0cf07de19a87edff269057/app/helpers/jsonld_helper.rb#L168
https://github.com/mastodon/mastodon/commit/63f097979990bf5ba9db848b8a253056bad781af
- GTS:
Only does a single fetch and then checks if _either_ the id
_or_ url property (which can be an object) match the original fetch
URL. This relies on implementations always including their display URL
as "url" if differing from the id. For actors this is true for all
investigated implementations, for posts only Mastodon includes an
"url", but it is also the only one with a differing display URL.
https://github.com/superseriousbusiness/gotosocial/commit/2bafd7daf542d985ee76d9079a30a602cb7be827#diff-943bbb02c8ac74ac5dc5d20807e561dcdfaebdc3b62b10730f643a20ac23c24fR222
Albeit Mastodon’s refetch offers higher compatibility with theoretical
implmentations using either multiple different display URL or not
denoting any of them as "url" at all, for now we chose to adopt a
GTS-like refetch-free approach to avoid additional implementation
concerns wrt to whether redirects should be allowed when fetching a
canonical AP id and potential for accidentally loosening some checks
(e.g. cross-domain refetches) for one of the fetches.
This may be reconsidered in the future.
2024-03-15 18:00:19 -06:00
|
|
|
|
2024-04-13 16:55:26 -06:00
|
|
|
{:mrf_reject_check, _} = e ->
|
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:reject, :mrf}
|
|
|
|
|
|
|
|
{:mrf_accept_check, _} = e ->
|
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:reject, :mrf}
|
|
|
|
|
2024-04-13 15:56:04 -06:00
|
|
|
{:valid_uri_scheme, _} = e ->
|
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:error, :invalid_uri_scheme}
|
2020-09-18 05:58:22 -06:00
|
|
|
|
2024-04-13 15:56:04 -06:00
|
|
|
{:local_fetch, _} = e ->
|
|
|
|
log_fetch_error(id, e)
|
|
|
|
{:error, :local_resource}
|
2024-03-13 16:00:23 -06:00
|
|
|
|
2024-04-13 15:56:04 -06:00
|
|
|
{:containment, reason} ->
|
|
|
|
log_fetch_error(id, reason)
|
|
|
|
{:error, reason}
|
2024-03-13 15:21:19 -06:00
|
|
|
|
2020-09-18 05:58:22 -06:00
|
|
|
{:error, e} ->
|
|
|
|
{:error, e}
|
|
|
|
|
|
|
|
e ->
|
|
|
|
{:error, e}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-11-17 07:28:30 -07:00
|
|
|
def fetch_and_contain_remote_object_from_id(_id),
|
2024-04-13 15:56:04 -06:00
|
|
|
do: {:error, :invalid_id}
|
2020-09-18 05:58:22 -06:00
|
|
|
|
2024-03-13 15:12:17 -06:00
|
|
|
defp check_crossdomain_redirect(final_host, original_url)
|
|
|
|
|
|
|
|
# HOPEFULLY TEMPORARY
|
|
|
|
# Basically none of our Tesla mocks in tests set the (supposed to
|
|
|
|
# exist for Tesla proper) url parameter for their responses
|
|
|
|
# causing almost every fetch in test to fail otherwise
|
|
|
|
if @mix_env == :test do
|
|
|
|
defp check_crossdomain_redirect(nil, _) do
|
|
|
|
{:cross_domain_redirect, false}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp check_crossdomain_redirect(final_host, original_url) do
|
|
|
|
{:cross_domain_redirect, final_host != URI.parse(original_url).host}
|
|
|
|
end
|
|
|
|
|
2024-03-15 13:57:09 -06:00
|
|
|
if @mix_env == :test do
|
|
|
|
defp get_final_id(nil, initial_url), do: initial_url
|
|
|
|
defp get_final_id("", initial_url), do: initial_url
|
|
|
|
end
|
|
|
|
|
|
|
|
defp get_final_id(final_url, _intial_url) do
|
|
|
|
final_url
|
|
|
|
end
|
|
|
|
|
2022-12-12 12:06:04 -07:00
|
|
|
def get_object(id) do
|
2019-08-22 13:39:06 -06:00
|
|
|
date = Pleroma.Signature.signed_date()
|
2019-07-17 16:41:42 -06:00
|
|
|
|
|
|
|
headers =
|
2024-04-11 14:45:45 -06:00
|
|
|
[
|
|
|
|
# The first is required by spec, the second provided as a fallback for buggy implementations
|
|
|
|
{"accept", "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\""},
|
|
|
|
{"accept", "application/activity+json"}
|
|
|
|
]
|
2019-07-17 16:41:42 -06:00
|
|
|
|> maybe_date_fetch(date)
|
|
|
|
|> sign_fetch(id, date)
|
|
|
|
|
2024-03-13 15:12:17 -06:00
|
|
|
with {:ok, %{body: body, status: code, headers: headers, url: final_url}}
|
|
|
|
when code in 200..299 <-
|
2023-12-15 10:12:45 -07:00
|
|
|
HTTP.Backoff.get(id, headers),
|
2024-03-13 15:12:17 -06:00
|
|
|
remote_host <-
|
|
|
|
URI.parse(final_url).host,
|
|
|
|
{:cross_domain_redirect, false} <-
|
|
|
|
check_crossdomain_redirect(remote_host, id),
|
2024-03-11 15:52:46 -06:00
|
|
|
{:has_content_type, {_, content_type}} <-
|
|
|
|
{:has_content_type, List.keyfind(headers, "content-type", 0)},
|
|
|
|
{:parse_content_type, {:ok, "application", subtype, type_params}} <-
|
|
|
|
{:parse_content_type, Plug.Conn.Utils.media_type(content_type)} do
|
2024-03-15 13:57:09 -06:00
|
|
|
final_id = get_final_id(final_url, id)
|
|
|
|
|
2024-03-11 15:52:46 -06:00
|
|
|
case {subtype, type_params} do
|
|
|
|
{"activity+json", _} ->
|
2024-03-15 13:57:09 -06:00
|
|
|
{:ok, final_id, body}
|
2024-03-11 15:52:46 -06:00
|
|
|
|
|
|
|
{"ld+json", %{"profile" => "https://www.w3.org/ns/activitystreams"}} ->
|
2024-03-15 13:57:09 -06:00
|
|
|
{:ok, final_id, body}
|
2024-03-11 15:52:46 -06:00
|
|
|
|
|
|
|
_ ->
|
|
|
|
{:error, {:content_type, content_type}}
|
|
|
|
end
|
|
|
|
else
|
2023-12-28 21:09:33 -07:00
|
|
|
{:ok, %{status: code}} when code in [401, 403] ->
|
2023-12-27 19:57:47 -07:00
|
|
|
{:error, :forbidden}
|
2023-12-26 14:05:28 -07:00
|
|
|
|
2019-06-13 04:13:35 -06:00
|
|
|
{:ok, %{status: code}} when code in [404, 410] ->
|
2023-12-27 19:57:47 -07:00
|
|
|
{:error, :not_found}
|
2019-06-13 03:34:03 -06:00
|
|
|
|
2019-10-24 10:08:34 -06:00
|
|
|
{:error, e} ->
|
|
|
|
{:error, e}
|
|
|
|
|
2024-03-11 15:52:46 -06:00
|
|
|
{:has_content_type, _} ->
|
|
|
|
{:error, {:content_type, nil}}
|
|
|
|
|
|
|
|
{:parse_content_type, e} ->
|
|
|
|
{:error, {:content_type, e}}
|
|
|
|
|
2019-10-17 20:42:25 -06:00
|
|
|
e ->
|
|
|
|
{:error, e}
|
2018-12-01 15:53:10 -07:00
|
|
|
end
|
|
|
|
end
|
2019-07-13 10:17:57 -06:00
|
|
|
|
2020-09-18 05:58:22 -06:00
|
|
|
defp safe_json_decode(nil), do: {:ok, nil}
|
|
|
|
defp safe_json_decode(json), do: Jason.decode(json)
|
2018-12-01 15:53:10 -07:00
|
|
|
end
|