2018-12-23 13:04:54 -07:00
|
|
|
# Pleroma: A lightweight social networking server
|
2022-02-25 23:11:42 -07:00
|
|
|
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
2018-12-23 13:04:54 -07:00
|
|
|
# SPDX-License-Identifier: AGPL-3.0-only
|
|
|
|
|
2018-11-23 09:40:45 -07:00
|
|
|
defmodule Pleroma.ReverseProxy do
|
2020-07-10 09:10:48 -06:00
|
|
|
@range_headers ~w(range if-range)
|
2021-02-22 13:46:59 -07:00
|
|
|
@keep_req_headers ~w(accept accept-encoding cache-control if-modified-since) ++
|
2020-07-10 09:10:48 -06:00
|
|
|
~w(if-unmodified-since if-none-match) ++ @range_headers
|
2020-03-13 10:30:27 -06:00
|
|
|
@resp_cache_headers ~w(etag date last-modified)
|
2018-11-23 09:40:45 -07:00
|
|
|
@keep_resp_headers @resp_cache_headers ++
|
2020-07-10 10:04:19 -06:00
|
|
|
~w(content-length content-type content-disposition content-encoding) ++
|
|
|
|
~w(content-range accept-ranges vary)
|
2018-11-23 09:40:45 -07:00
|
|
|
@default_cache_control_header "public, max-age=1209600"
|
|
|
|
@valid_resp_codes [200, 206, 304]
|
2018-11-30 11:12:03 -07:00
|
|
|
@max_read_duration :timer.seconds(30)
|
2018-11-23 09:40:45 -07:00
|
|
|
@max_body_length :infinity
|
2019-10-01 14:00:27 -06:00
|
|
|
@failed_request_ttl :timer.seconds(60)
|
2018-11-23 09:40:45 -07:00
|
|
|
@methods ~w(GET HEAD)
|
|
|
|
|
2020-12-18 09:44:46 -07:00
|
|
|
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
|
|
|
|
2020-05-11 14:21:53 -06:00
|
|
|
def max_read_duration_default, do: @max_read_duration
|
2020-09-05 11:19:09 -06:00
|
|
|
def default_cache_control_header, do: @default_cache_control_header
|
2020-05-11 14:21:53 -06:00
|
|
|
|
2018-11-23 09:40:45 -07:00
|
|
|
@moduledoc """
|
|
|
|
A reverse proxy.
|
|
|
|
|
|
|
|
Pleroma.ReverseProxy.call(conn, url, options)
|
|
|
|
|
|
|
|
It is not meant to be added into a plug pipeline, but to be called from another plug or controller.
|
|
|
|
|
|
|
|
Supports `#{inspect(@methods)}` HTTP methods, and only allows `#{inspect(@valid_resp_codes)}` status codes.
|
|
|
|
|
|
|
|
Responses are chunked to the client while downloading from the upstream.
|
|
|
|
|
|
|
|
Some request / responses headers are preserved:
|
|
|
|
|
|
|
|
* request: `#{inspect(@keep_req_headers)}`
|
|
|
|
* response: `#{inspect(@keep_resp_headers)}`
|
|
|
|
|
|
|
|
Options:
|
|
|
|
|
|
|
|
* `redirect_on_failure` (default `false`). Redirects the client to the real remote URL if there's any HTTP
|
|
|
|
errors. Any error during body processing will not be redirected as the response is chunked. This may expose
|
|
|
|
remote URL, clients IPs, ….
|
|
|
|
|
|
|
|
* `max_body_length` (default `#{inspect(@max_body_length)}`): limits the content length to be approximately the
|
|
|
|
specified length. It is validated with the `content-length` header and also verified when proxying.
|
|
|
|
|
|
|
|
* `max_read_duration` (default `#{inspect(@max_read_duration)}` ms): the total time the connection is allowed to
|
|
|
|
read from the remote upstream.
|
|
|
|
|
2019-10-01 14:00:27 -06:00
|
|
|
* `failed_request_ttl` (default `#{inspect(@failed_request_ttl)}` ms): the time the failed request is cached and cannot be retried.
|
|
|
|
|
2018-11-23 09:40:45 -07:00
|
|
|
* `inline_content_types`:
|
|
|
|
* `true` will not alter `content-disposition` (up to the upstream),
|
|
|
|
* `false` will add `content-disposition: attachment` to any request,
|
|
|
|
* a list of whitelisted content types
|
|
|
|
|
|
|
|
* `req_headers`, `resp_headers` additional headers.
|
|
|
|
|
2020-03-13 00:37:57 -06:00
|
|
|
* `http`: options for [hackney](https://github.com/benoitc/hackney) or [gun](https://github.com/ninenines/gun).
|
2018-11-23 09:40:45 -07:00
|
|
|
|
|
|
|
"""
|
2020-02-11 00:12:57 -07:00
|
|
|
@default_options [pool: :media]
|
2018-11-23 09:40:45 -07:00
|
|
|
|
|
|
|
@inline_content_types [
|
|
|
|
"image/gif",
|
|
|
|
"image/jpeg",
|
|
|
|
"image/jpg",
|
|
|
|
"image/png",
|
|
|
|
"image/svg+xml",
|
|
|
|
"audio/mpeg",
|
|
|
|
"audio/mp3",
|
|
|
|
"video/webm",
|
|
|
|
"video/mp4",
|
|
|
|
"video/quicktime"
|
|
|
|
]
|
|
|
|
|
|
|
|
require Logger
|
|
|
|
import Plug.Conn
|
|
|
|
|
|
|
|
@type option() ::
|
2021-02-22 13:46:59 -07:00
|
|
|
{:max_read_duration, :timer.time() | :infinity}
|
2018-11-23 09:40:45 -07:00
|
|
|
| {:max_body_length, non_neg_integer() | :infinity}
|
2019-10-01 14:00:27 -06:00
|
|
|
| {:failed_request_ttl, :timer.time() | :infinity}
|
2018-11-23 09:40:45 -07:00
|
|
|
| {:http, []}
|
|
|
|
| {:req_headers, [{String.t(), String.t()}]}
|
|
|
|
| {:resp_headers, [{String.t(), String.t()}]}
|
|
|
|
| {:inline_content_types, boolean() | [String.t()]}
|
|
|
|
| {:redirect_on_failure, boolean()}
|
|
|
|
|
|
|
|
@spec call(Plug.Conn.t(), url :: String.t(), [option()]) :: Plug.Conn.t()
|
2018-12-09 02:12:48 -07:00
|
|
|
def call(_conn, _url, _opts \\ [])
|
|
|
|
|
|
|
|
def call(conn = %{method: method}, url, opts) when method in @methods do
|
2020-02-11 00:12:57 -07:00
|
|
|
client_opts = Keyword.merge(@default_options, Keyword.get(opts, :http, []))
|
2018-11-23 09:40:45 -07:00
|
|
|
|
|
|
|
req_headers = build_req_headers(conn.req_headers, opts)
|
|
|
|
|
|
|
|
opts =
|
|
|
|
if filename = Pleroma.Web.MediaProxy.filename(url) do
|
|
|
|
Keyword.put_new(opts, :attachment_name, filename)
|
|
|
|
else
|
|
|
|
opts
|
|
|
|
end
|
|
|
|
|
2020-12-18 09:44:46 -07:00
|
|
|
with {:ok, nil} <- @cachex.get(:failed_proxy_url_cache, url),
|
2020-02-11 00:12:57 -07:00
|
|
|
{:ok, code, headers, client} <- request(method, url, req_headers, client_opts),
|
2019-08-16 09:00:18 -06:00
|
|
|
:ok <-
|
|
|
|
header_length_constraint(
|
|
|
|
headers,
|
|
|
|
Keyword.get(opts, :max_body_length, @max_body_length)
|
|
|
|
) do
|
2018-11-23 09:40:45 -07:00
|
|
|
response(conn, client, url, code, headers, opts)
|
|
|
|
else
|
2019-10-01 14:00:27 -06:00
|
|
|
{:ok, true} ->
|
|
|
|
conn
|
|
|
|
|> error_or_redirect(url, 500, "Request failed", opts)
|
|
|
|
|> halt()
|
|
|
|
|
2018-11-23 09:40:45 -07:00
|
|
|
{:ok, code, headers} ->
|
|
|
|
head_response(conn, url, code, headers, opts)
|
|
|
|
|> halt()
|
|
|
|
|
|
|
|
{:error, {:invalid_http_response, code}} ->
|
|
|
|
Logger.error("#{__MODULE__}: request to #{inspect(url)} failed with HTTP status #{code}")
|
2019-10-01 14:00:27 -06:00
|
|
|
track_failed_url(url, code, opts)
|
2018-11-23 09:40:45 -07:00
|
|
|
|
|
|
|
conn
|
|
|
|
|> error_or_redirect(
|
|
|
|
url,
|
|
|
|
code,
|
|
|
|
"Request failed: " <> Plug.Conn.Status.reason_phrase(code),
|
|
|
|
opts
|
|
|
|
)
|
|
|
|
|> halt()
|
|
|
|
|
|
|
|
{:error, error} ->
|
|
|
|
Logger.error("#{__MODULE__}: request to #{inspect(url)} failed: #{inspect(error)}")
|
2019-10-01 14:00:27 -06:00
|
|
|
track_failed_url(url, error, opts)
|
2018-11-23 09:40:45 -07:00
|
|
|
|
|
|
|
conn
|
|
|
|
|> error_or_redirect(url, 500, "Request failed", opts)
|
|
|
|
|> halt()
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def call(conn, _, _) do
|
|
|
|
conn
|
|
|
|
|> send_resp(400, Plug.Conn.Status.reason_phrase(400))
|
|
|
|
|> halt()
|
|
|
|
end
|
|
|
|
|
2020-02-11 00:12:57 -07:00
|
|
|
defp request(method, url, headers, opts) do
|
2018-11-23 09:40:45 -07:00
|
|
|
Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}")
|
|
|
|
method = method |> String.downcase() |> String.to_existing_atom()
|
|
|
|
|
2020-02-11 00:12:57 -07:00
|
|
|
case client().request(method, url, headers, "", opts) do
|
2018-11-23 09:40:45 -07:00
|
|
|
{:ok, code, headers, client} when code in @valid_resp_codes ->
|
|
|
|
{:ok, code, downcase_headers(headers), client}
|
|
|
|
|
|
|
|
{:ok, code, headers} when code in @valid_resp_codes ->
|
|
|
|
{:ok, code, downcase_headers(headers)}
|
|
|
|
|
|
|
|
{:ok, code, _, _} ->
|
|
|
|
{:error, {:invalid_http_response, code}}
|
|
|
|
|
2020-07-26 10:18:21 -06:00
|
|
|
{:ok, code, _} ->
|
|
|
|
{:error, {:invalid_http_response, code}}
|
2018-11-23 09:40:45 -07:00
|
|
|
|
|
|
|
{:error, error} ->
|
|
|
|
{:error, error}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp response(conn, client, url, status, headers, opts) do
|
2020-07-10 09:10:48 -06:00
|
|
|
Logger.debug("#{__MODULE__} #{status} #{url} #{inspect(headers)}")
|
|
|
|
|
2018-11-23 09:40:45 -07:00
|
|
|
result =
|
|
|
|
conn
|
|
|
|
|> put_resp_headers(build_resp_headers(headers, opts))
|
|
|
|
|> send_chunked(status)
|
|
|
|
|> chunk_reply(client, opts)
|
|
|
|
|
|
|
|
case result do
|
|
|
|
{:ok, conn} ->
|
|
|
|
halt(conn)
|
|
|
|
|
|
|
|
{:error, :closed, conn} ->
|
2019-07-09 10:54:13 -06:00
|
|
|
client().close(client)
|
2018-11-23 09:40:45 -07:00
|
|
|
halt(conn)
|
|
|
|
|
|
|
|
{:error, error, conn} ->
|
|
|
|
Logger.warn(
|
|
|
|
"#{__MODULE__} request to #{url} failed while reading/chunking: #{inspect(error)}"
|
|
|
|
)
|
|
|
|
|
2019-07-09 10:54:13 -06:00
|
|
|
client().close(client)
|
2018-11-23 09:40:45 -07:00
|
|
|
halt(conn)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp chunk_reply(conn, client, opts) do
|
|
|
|
chunk_reply(conn, client, opts, 0, 0)
|
|
|
|
end
|
|
|
|
|
|
|
|
defp chunk_reply(conn, client, opts, sent_so_far, duration) do
|
|
|
|
with {:ok, duration} <-
|
|
|
|
check_read_duration(
|
|
|
|
duration,
|
|
|
|
Keyword.get(opts, :max_read_duration, @max_read_duration)
|
|
|
|
),
|
2020-02-11 00:12:57 -07:00
|
|
|
{:ok, data, client} <- client().stream_body(client),
|
2018-11-23 09:40:45 -07:00
|
|
|
{:ok, duration} <- increase_read_duration(duration),
|
|
|
|
sent_so_far = sent_so_far + byte_size(data),
|
2019-08-16 09:00:18 -06:00
|
|
|
:ok <-
|
|
|
|
body_size_constraint(
|
|
|
|
sent_so_far,
|
|
|
|
Keyword.get(opts, :max_body_length, @max_body_length)
|
|
|
|
),
|
2018-11-23 09:40:45 -07:00
|
|
|
{:ok, conn} <- chunk(conn, data) do
|
|
|
|
chunk_reply(conn, client, opts, sent_so_far, duration)
|
|
|
|
else
|
|
|
|
:done -> {:ok, conn}
|
|
|
|
{:error, error} -> {:error, error, conn}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-07-10 09:10:48 -06:00
|
|
|
defp head_response(conn, url, code, headers, opts) do
|
|
|
|
Logger.debug("#{__MODULE__} #{code} #{url} #{inspect(headers)}")
|
|
|
|
|
2018-11-23 09:40:45 -07:00
|
|
|
conn
|
|
|
|
|> put_resp_headers(build_resp_headers(headers, opts))
|
|
|
|
|> send_resp(code, "")
|
|
|
|
end
|
|
|
|
|
|
|
|
defp error_or_redirect(conn, url, code, body, opts) do
|
|
|
|
if Keyword.get(opts, :redirect_on_failure, false) do
|
|
|
|
conn
|
|
|
|
|> Phoenix.Controller.redirect(external: url)
|
|
|
|
|> halt()
|
|
|
|
else
|
|
|
|
conn
|
|
|
|
|> send_resp(code, body)
|
|
|
|
|> halt
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp downcase_headers(headers) do
|
|
|
|
Enum.map(headers, fn {k, v} ->
|
|
|
|
{String.downcase(k), v}
|
|
|
|
end)
|
|
|
|
end
|
|
|
|
|
2018-11-23 10:57:27 -07:00
|
|
|
defp get_content_type(headers) do
|
2018-11-23 15:59:24 -07:00
|
|
|
{_, content_type} =
|
|
|
|
List.keyfind(headers, "content-type", 0, {"content-type", "application/octet-stream"})
|
|
|
|
|
|
|
|
[content_type | _] = String.split(content_type, ";")
|
2018-11-23 10:57:27 -07:00
|
|
|
content_type
|
|
|
|
end
|
|
|
|
|
2018-11-23 09:40:45 -07:00
|
|
|
defp put_resp_headers(conn, headers) do
|
|
|
|
Enum.reduce(headers, conn, fn {k, v}, conn ->
|
|
|
|
put_resp_header(conn, k, v)
|
|
|
|
end)
|
|
|
|
end
|
|
|
|
|
|
|
|
defp build_req_headers(headers, opts) do
|
2018-12-09 02:12:48 -07:00
|
|
|
headers
|
|
|
|
|> downcase_headers()
|
|
|
|
|> Enum.filter(fn {k, _} -> k in @keep_req_headers end)
|
2020-07-10 09:10:48 -06:00
|
|
|
|> build_req_range_or_encoding_header(opts)
|
|
|
|
|> build_req_user_agent_header(opts)
|
|
|
|
|> Keyword.merge(Keyword.get(opts, :req_headers, []))
|
|
|
|
end
|
|
|
|
|
|
|
|
# Disable content-encoding if any @range_headers are requested (see #1823).
|
|
|
|
defp build_req_range_or_encoding_header(headers, _opts) do
|
|
|
|
range? = Enum.any?(headers, fn {header, _} -> Enum.member?(@range_headers, header) end)
|
|
|
|
|
|
|
|
if range? && List.keymember?(headers, "accept-encoding", 0) do
|
|
|
|
List.keydelete(headers, "accept-encoding", 0)
|
|
|
|
else
|
|
|
|
headers
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2021-02-22 13:46:59 -07:00
|
|
|
defp build_req_user_agent_header(headers, _opts) do
|
|
|
|
List.keystore(
|
|
|
|
headers,
|
|
|
|
"user-agent",
|
|
|
|
0,
|
|
|
|
{"user-agent", Pleroma.Application.user_agent()}
|
|
|
|
)
|
2018-11-23 09:40:45 -07:00
|
|
|
end
|
|
|
|
|
|
|
|
defp build_resp_headers(headers, opts) do
|
2018-11-23 15:59:24 -07:00
|
|
|
headers
|
|
|
|
|> Enum.filter(fn {k, _} -> k in @keep_resp_headers end)
|
|
|
|
|> build_resp_cache_headers(opts)
|
|
|
|
|> build_resp_content_disposition_header(opts)
|
2020-07-10 09:10:48 -06:00
|
|
|
|> Keyword.merge(Keyword.get(opts, :resp_headers, []))
|
2018-11-23 09:40:45 -07:00
|
|
|
end
|
|
|
|
|
2018-12-09 02:12:48 -07:00
|
|
|
defp build_resp_cache_headers(headers, _opts) do
|
2018-11-23 09:40:45 -07:00
|
|
|
has_cache? = Enum.any?(headers, fn {k, _} -> k in @resp_cache_headers end)
|
2019-01-21 07:17:24 -07:00
|
|
|
|
|
|
|
cond do
|
|
|
|
has_cache? ->
|
2020-03-13 10:46:40 -06:00
|
|
|
# There's caching header present but no cache-control -- we need to set our own
|
|
|
|
# as Plug defaults to "max-age=0, private, must-revalidate"
|
2020-03-13 11:42:06 -06:00
|
|
|
List.keystore(
|
|
|
|
headers,
|
|
|
|
"cache-control",
|
|
|
|
0,
|
|
|
|
{"cache-control", @default_cache_control_header}
|
|
|
|
)
|
2019-01-21 07:17:24 -07:00
|
|
|
|
|
|
|
true ->
|
|
|
|
List.keystore(
|
|
|
|
headers,
|
|
|
|
"cache-control",
|
|
|
|
0,
|
|
|
|
{"cache-control", @default_cache_control_header}
|
|
|
|
)
|
2018-11-23 09:40:45 -07:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp build_resp_content_disposition_header(headers, opts) do
|
|
|
|
opt = Keyword.get(opts, :inline_content_types, @inline_content_types)
|
|
|
|
|
2018-11-23 10:57:27 -07:00
|
|
|
content_type = get_content_type(headers)
|
2018-11-23 09:40:45 -07:00
|
|
|
|
|
|
|
attachment? =
|
|
|
|
cond do
|
|
|
|
is_list(opt) && !Enum.member?(opt, content_type) -> true
|
|
|
|
opt == false -> true
|
|
|
|
true -> false
|
|
|
|
end
|
|
|
|
|
|
|
|
if attachment? do
|
2019-03-14 16:36:29 -06:00
|
|
|
name =
|
|
|
|
try do
|
|
|
|
{{"content-disposition", content_disposition_string}, _} =
|
|
|
|
List.keytake(headers, "content-disposition", 0)
|
|
|
|
|
2019-03-14 23:29:51 -06:00
|
|
|
[name | _] =
|
2019-03-14 16:36:29 -06:00
|
|
|
Regex.run(
|
2019-03-14 23:29:51 -06:00
|
|
|
~r/filename="((?:[^"\\]|\\.)*)"/u,
|
2019-03-14 16:36:29 -06:00
|
|
|
content_disposition_string || "",
|
|
|
|
capture: :all_but_first
|
|
|
|
)
|
|
|
|
|
|
|
|
name
|
|
|
|
rescue
|
|
|
|
MatchError -> Keyword.get(opts, :attachment_name, "attachment")
|
|
|
|
end
|
|
|
|
|
2019-03-14 23:29:51 -06:00
|
|
|
disposition = "attachment; filename=\"#{name}\""
|
2019-03-14 16:36:29 -06:00
|
|
|
|
2018-11-23 09:40:45 -07:00
|
|
|
List.keystore(headers, "content-disposition", 0, {"content-disposition", disposition})
|
|
|
|
else
|
|
|
|
headers
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-11-29 13:11:45 -07:00
|
|
|
defp header_length_constraint(headers, limit) when is_integer(limit) and limit > 0 do
|
2018-11-23 09:40:45 -07:00
|
|
|
with {_, size} <- List.keyfind(headers, "content-length", 0),
|
|
|
|
{size, _} <- Integer.parse(size),
|
|
|
|
true <- size <= limit do
|
|
|
|
:ok
|
|
|
|
else
|
|
|
|
false ->
|
|
|
|
{:error, :body_too_large}
|
|
|
|
|
|
|
|
_ ->
|
|
|
|
:ok
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-11-29 13:11:45 -07:00
|
|
|
defp header_length_constraint(_, _), do: :ok
|
2018-11-23 09:40:45 -07:00
|
|
|
|
|
|
|
defp body_size_constraint(size, limit) when is_integer(limit) and limit > 0 and size >= limit do
|
|
|
|
{:error, :body_too_large}
|
|
|
|
end
|
|
|
|
|
|
|
|
defp body_size_constraint(_, _), do: :ok
|
|
|
|
|
2020-05-11 14:21:53 -06:00
|
|
|
defp check_read_duration(nil = _duration, max), do: check_read_duration(@max_read_duration, max)
|
|
|
|
|
2018-11-23 09:40:45 -07:00
|
|
|
defp check_read_duration(duration, max)
|
|
|
|
when is_integer(duration) and is_integer(max) and max > 0 do
|
|
|
|
if duration > max do
|
|
|
|
{:error, :read_duration_exceeded}
|
|
|
|
else
|
|
|
|
{:ok, {duration, :erlang.system_time(:millisecond)}}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp check_read_duration(_, _), do: {:ok, :no_duration_limit, :no_duration_limit}
|
|
|
|
|
|
|
|
defp increase_read_duration({previous_duration, started})
|
|
|
|
when is_integer(previous_duration) and is_integer(started) do
|
|
|
|
duration = :erlang.system_time(:millisecond) - started
|
|
|
|
{:ok, previous_duration + duration}
|
|
|
|
end
|
|
|
|
|
|
|
|
defp increase_read_duration(_) do
|
|
|
|
{:ok, :no_duration_limit, :no_duration_limit}
|
|
|
|
end
|
2019-06-14 09:45:05 -06:00
|
|
|
|
2021-06-02 15:34:32 -06:00
|
|
|
defp client, do: Pleroma.ReverseProxy.Client.Wrapper
|
2019-10-01 14:00:27 -06:00
|
|
|
|
2019-10-11 13:52:38 -06:00
|
|
|
defp track_failed_url(url, error, opts) do
|
2019-10-01 14:00:27 -06:00
|
|
|
ttl =
|
2019-10-11 13:52:38 -06:00
|
|
|
unless error in [:body_too_large, 400, 204] do
|
2019-10-01 14:00:27 -06:00
|
|
|
Keyword.get(opts, :failed_request_ttl, @failed_request_ttl)
|
|
|
|
else
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
2020-12-18 09:44:46 -07:00
|
|
|
@cachex.put(:failed_proxy_url_cache, url, true, ttl: ttl)
|
2019-10-01 14:00:27 -06:00
|
|
|
end
|
2018-11-23 09:40:45 -07:00
|
|
|
end
|