akkoma/test/pleroma/web/rich_media/parser_test.exs

199 lines
6.6 KiB
Elixir
Raw Normal View History

# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
2019-01-01 13:26:40 -07:00
defmodule Pleroma.Web.RichMedia.ParserTest do
2024-02-11 14:54:41 -07:00
use Pleroma.DataCase
2020-09-01 10:12:45 -06:00
alias Pleroma.Web.RichMedia.Parser
2019-01-01 13:26:40 -07:00
setup do
2022-12-30 13:11:53 -07:00
Tesla.Mock.mock_global(fn
2019-01-01 13:26:40 -07:00
%{
method: :get,
url: "http://example.com/ogp"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}
2019-01-02 07:02:50 -07:00
%{
method: :get,
url: "http://example.com/non-ogp"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")}
%{
method: :get,
url: "http://example.com/ogp-missing-title"
} ->
%Tesla.Env{
status: 200,
body: File.read!("test/fixtures/rich_media/ogp-missing-title.html")
}
2019-01-10 11:09:56 -07:00
%{
method: :get,
url: "http://example.com/twitter-card"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}
2019-01-12 17:06:50 -07:00
%{
method: :get,
url: "http://example.com/oembed"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")}
%{
method: :get,
url: "http://example.com/oembed.json"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")}
2019-01-02 07:02:50 -07:00
%{method: :get, url: "http://example.com/empty"} ->
%Tesla.Env{status: 200, body: "hello"}
2020-09-01 10:12:45 -06:00
%{method: :get, url: "http://example.com/malformed"} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}
%{method: :get, url: "http://example.com/error"} ->
{:error, :overload}
%{
method: :head,
url: "http://example.com/huge-page"
} ->
%Tesla.Env{
status: 200,
headers: [{"content-length", "2000001"}, {"content-type", "text/html"}]
}
%{
method: :head,
url: "http://example.com/pdf-file"
} ->
%Tesla.Env{
status: 200,
headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}]
}
%{method: :head} ->
%Tesla.Env{status: 404, body: "", headers: []}
2019-01-01 13:26:40 -07:00
end)
:ok
end
2019-01-02 07:02:50 -07:00
test "returns error when no metadata present" do
2020-09-01 10:12:45 -06:00
assert {:error, _} = Parser.parse("http://example.com/empty")
2019-01-02 07:02:50 -07:00
end
test "doesn't just add a title" do
assert {:error, {:invalid_metadata, _}} = Parser.parse("http://example.com/non-ogp")
end
2019-01-01 13:26:40 -07:00
test "parses ogp" do
2020-09-01 10:12:45 -06:00
assert Parser.parse("http://example.com/ogp") ==
2019-01-02 07:02:50 -07:00
{:ok,
%{
2020-06-09 11:49:24 -06:00
"image" => "http://ia.media-imdb.com/images/rock.jpg",
"title" => "The Rock",
"description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
2020-06-09 11:49:24 -06:00
"type" => "video.movie",
"url" => "http://example.com/ogp"
2019-01-02 07:02:50 -07:00
}}
2019-01-01 13:26:40 -07:00
end
2019-01-10 11:09:56 -07:00
test "falls back to <title> when ogp:title is missing" do
2020-09-01 10:12:45 -06:00
assert Parser.parse("http://example.com/ogp-missing-title") ==
{:ok,
%{
2020-06-09 11:49:24 -06:00
"image" => "http://ia.media-imdb.com/images/rock.jpg",
"title" => "The Rock (1996)",
"description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
2020-06-09 11:49:24 -06:00
"type" => "video.movie",
"url" => "http://example.com/ogp-missing-title"
}}
end
2019-01-10 11:09:56 -07:00
test "parses twitter card" do
2020-09-01 10:12:45 -06:00
assert Parser.parse("http://example.com/twitter-card") ==
2019-01-10 11:09:56 -07:00
{:ok,
%{
2020-06-09 11:49:24 -06:00
"card" => "summary",
"site" => "@flickr",
"image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
"title" => "Small Island Developing States Photo Submission",
"description" => "View the album on Flickr.",
"url" => "http://example.com/twitter-card"
2019-01-10 11:09:56 -07:00
}}
end
2019-01-12 17:06:50 -07:00
2023-05-26 13:45:57 -06:00
test "parses OEmbed and filters HTML tags" do
2020-09-01 10:12:45 -06:00
assert Parser.parse("http://example.com/oembed") ==
2019-01-12 17:06:50 -07:00
{:ok,
%{
2021-12-12 10:23:44 -07:00
"author_name" => "\u202E\u202D\u202Cbees\u202C",
2020-06-09 11:49:24 -06:00
"author_url" => "https://www.flickr.com/photos/bees/",
"cache_age" => 3600,
"flickr_type" => "photo",
"height" => "768",
"html" =>
2023-05-26 13:45:57 -06:00
"<a href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by \u202E\u202D\u202Cbees\u202C, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"/></a>",
2020-06-09 11:49:24 -06:00
"license" => "All Rights Reserved",
"license_id" => 0,
"provider_name" => "Flickr",
"provider_url" => "https://www.flickr.com/",
"thumbnail_height" => 150,
"thumbnail_url" =>
"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
"thumbnail_width" => 150,
"title" => "Bacon Lollys",
"type" => "photo",
"url" => "http://example.com/oembed",
"version" => "1.0",
"web_page" => "https://www.flickr.com/photos/bees/2362225867/",
"web_page_short_url" => "https://flic.kr/p/4AK2sc",
"width" => "1024"
2019-01-12 17:06:50 -07:00
}}
end
test "rejects invalid OGP data" do
2020-09-01 10:12:45 -06:00
assert {:error, _} = Parser.parse("http://example.com/malformed")
end
test "returns error if getting page was not successful" do
assert {:error, :overload} = Parser.parse("http://example.com/error")
end
test "does a HEAD request to check if the body is too large" do
2020-09-14 05:56:00 -06:00
assert {:error, :body_too_large} = Parser.parse("http://example.com/huge-page")
end
test "does a HEAD request to check if the body is html" do
assert {:error, {:content_type, _}} = Parser.parse("http://example.com/pdf-file")
end
2024-02-11 14:54:41 -07:00
test "refuses to crawl incomplete URLs" do
url = "example.com/ogp"
assert :error == Parser.parse(url)
end
test "refuses to crawl malformed URLs" do
url = "example.com[]/ogp"
assert :error == Parser.parse(url)
end
test "refuses to crawl URLs of private network from posts" do
[
"http://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO",
"https://10.111.10.1/notice/9kCP7V",
"https://172.16.32.40/notice/9kCP7V",
"https://192.168.10.40/notice/9kCP7V",
"https://pleroma.local/notice/9kCP7V"
]
|> Enum.each(fn url ->
assert :error == Parser.parse(url)
end)
end
2019-01-01 13:26:40 -07:00
end