2018-12-23 13:11:29 -07:00
|
|
|
# Pleroma: A lightweight social networking server
|
2022-02-25 23:11:42 -07:00
|
|
|
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
2018-12-23 13:11:29 -07:00
|
|
|
# SPDX-License-Identifier: AGPL-3.0-only
|
|
|
|
|
2018-09-21 21:44:19 -06:00
|
|
|
defmodule Pleroma.HTMLTest do
|
|
|
|
alias Pleroma.HTML
|
2019-06-14 05:34:42 -06:00
|
|
|
alias Pleroma.Object
|
|
|
|
alias Pleroma.Web.CommonAPI
|
2020-12-21 04:21:40 -07:00
|
|
|
use Pleroma.DataCase, async: true
|
2018-09-21 21:44:19 -06:00
|
|
|
|
2019-06-14 05:34:42 -06:00
|
|
|
import Pleroma.Factory
|
|
|
|
|
2018-09-21 21:44:19 -06:00
|
|
|
@html_sample """
|
|
|
|
<b>this is in bold</b>
|
|
|
|
<p>this is a paragraph</p>
|
|
|
|
this is a linebreak<br />
|
2019-03-17 07:46:46 -06:00
|
|
|
this is a link with allowed "rel" attribute: <a href="http://example.com/" rel="tag">example.com</a>
|
|
|
|
this is a link with not allowed "rel" attribute: <a href="http://example.com/" rel="tag noallowed">example.com</a>
|
2018-09-21 21:44:19 -06:00
|
|
|
this is an image: <img src="http://example.com/image.jpg"><br />
|
2022-11-26 19:40:21 -07:00
|
|
|
this is an inline emoji: <img class="emoji" src="http://example.com/image.jpg"><br />
|
2018-09-21 21:44:19 -06:00
|
|
|
<script>alert('hacked')</script>
|
|
|
|
"""
|
|
|
|
|
|
|
|
@html_onerror_sample """
|
2019-10-28 16:18:08 -06:00
|
|
|
<img src="http://example.com/image.jpg" onerror="alert('hacked')">
|
2018-09-21 21:44:19 -06:00
|
|
|
"""
|
|
|
|
|
2022-11-26 19:40:21 -07:00
|
|
|
@html_stillimage_sample """
|
|
|
|
<img class="still-image" src="http://example.com/image.jpg">
|
|
|
|
"""
|
|
|
|
|
2019-04-23 16:55:21 -06:00
|
|
|
@html_span_class_sample """
|
2019-10-28 16:18:08 -06:00
|
|
|
<span class="animate-spin">hi</span>
|
2019-04-23 16:55:21 -06:00
|
|
|
"""
|
|
|
|
|
|
|
|
@html_span_microformats_sample """
|
2019-10-28 16:18:08 -06:00
|
|
|
<span class="h-card"><a class="u-url mention">@<span>foo</span></a></span>
|
2019-04-23 16:55:21 -06:00
|
|
|
"""
|
|
|
|
|
|
|
|
@html_span_invalid_microformats_sample """
|
2019-10-28 16:18:08 -06:00
|
|
|
<span class="h-card"><a class="u-url mention animate-spin">@<span>foo</span></a></span>
|
2019-04-23 16:55:21 -06:00
|
|
|
"""
|
|
|
|
|
2018-09-21 21:44:19 -06:00
|
|
|
describe "StripTags scrubber" do
|
|
|
|
test "works as expected" do
|
|
|
|
expected = """
|
2019-10-28 16:18:08 -06:00
|
|
|
this is in bold
|
2018-09-21 21:44:19 -06:00
|
|
|
this is a paragraph
|
|
|
|
this is a linebreak
|
2019-10-28 16:18:08 -06:00
|
|
|
this is a link with allowed "rel" attribute: example.com
|
|
|
|
this is a link with not allowed "rel" attribute: example.com
|
2018-09-21 21:44:19 -06:00
|
|
|
this is an image:
|
2022-11-26 19:40:21 -07:00
|
|
|
this is an inline emoji:
|
2019-10-28 16:18:08 -06:00
|
|
|
alert('hacked')
|
2018-09-21 21:44:19 -06:00
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected == HTML.strip_tags(@html_sample)
|
|
|
|
end
|
|
|
|
|
|
|
|
test "does not allow attribute-based XSS" do
|
|
|
|
expected = "\n"
|
|
|
|
|
|
|
|
assert expected == HTML.strip_tags(@html_onerror_sample)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
describe "TwitterText scrubber" do
|
|
|
|
test "normalizes HTML as expected" do
|
|
|
|
expected = """
|
2019-10-28 16:18:08 -06:00
|
|
|
this is in bold
|
2018-09-21 21:44:19 -06:00
|
|
|
<p>this is a paragraph</p>
|
2019-10-28 16:18:08 -06:00
|
|
|
this is a linebreak<br/>
|
|
|
|
this is a link with allowed "rel" attribute: <a href="http://example.com/" rel="tag">example.com</a>
|
|
|
|
this is a link with not allowed "rel" attribute: <a href="http://example.com/">example.com</a>
|
|
|
|
this is an image: <img src="http://example.com/image.jpg"/><br/>
|
2022-11-26 19:40:21 -07:00
|
|
|
this is an inline emoji: <img class="emoji" src="http://example.com/image.jpg"/><br/>
|
2019-10-28 16:18:08 -06:00
|
|
|
alert('hacked')
|
2018-09-21 21:44:19 -06:00
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected == HTML.filter_tags(@html_sample, Pleroma.HTML.Scrubber.TwitterText)
|
|
|
|
end
|
|
|
|
|
|
|
|
test "does not allow attribute-based XSS" do
|
|
|
|
expected = """
|
2019-10-28 16:18:08 -06:00
|
|
|
<img src="http://example.com/image.jpg"/>
|
2018-09-21 21:44:19 -06:00
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected == HTML.filter_tags(@html_onerror_sample, Pleroma.HTML.Scrubber.TwitterText)
|
|
|
|
end
|
2019-04-23 16:55:21 -06:00
|
|
|
|
|
|
|
test "does not allow spans with invalid classes" do
|
|
|
|
expected = """
|
|
|
|
<span>hi</span>
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected ==
|
|
|
|
HTML.filter_tags(@html_span_class_sample, Pleroma.HTML.Scrubber.TwitterText)
|
|
|
|
end
|
|
|
|
|
2022-11-26 19:40:21 -07:00
|
|
|
test "does not allow images with invalid classes" do
|
|
|
|
expected = """
|
|
|
|
<img src="http://example.com/image.jpg"/>
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected ==
|
|
|
|
HTML.filter_tags(@html_stillimage_sample, Pleroma.HTML.Scrubber.TwitterText)
|
|
|
|
end
|
|
|
|
|
2019-04-23 16:55:21 -06:00
|
|
|
test "does allow microformats" do
|
|
|
|
expected = """
|
|
|
|
<span class="h-card"><a class="u-url mention">@<span>foo</span></a></span>
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected ==
|
|
|
|
HTML.filter_tags(@html_span_microformats_sample, Pleroma.HTML.Scrubber.TwitterText)
|
|
|
|
end
|
|
|
|
|
|
|
|
test "filters invalid microformats markup" do
|
|
|
|
expected = """
|
|
|
|
<span class="h-card"><a>@<span>foo</span></a></span>
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected ==
|
|
|
|
HTML.filter_tags(
|
|
|
|
@html_span_invalid_microformats_sample,
|
|
|
|
Pleroma.HTML.Scrubber.TwitterText
|
|
|
|
)
|
|
|
|
end
|
2018-09-21 21:44:19 -06:00
|
|
|
end
|
|
|
|
|
|
|
|
describe "default scrubber" do
|
|
|
|
test "normalizes HTML as expected" do
|
|
|
|
expected = """
|
2019-10-28 16:18:08 -06:00
|
|
|
<b>this is in bold</b>
|
2018-09-21 21:44:19 -06:00
|
|
|
<p>this is a paragraph</p>
|
2019-10-28 16:18:08 -06:00
|
|
|
this is a linebreak<br/>
|
|
|
|
this is a link with allowed "rel" attribute: <a href="http://example.com/" rel="tag">example.com</a>
|
|
|
|
this is a link with not allowed "rel" attribute: <a href="http://example.com/">example.com</a>
|
|
|
|
this is an image: <img src="http://example.com/image.jpg"/><br/>
|
2022-11-26 19:40:21 -07:00
|
|
|
this is an inline emoji: <img class="emoji" src="http://example.com/image.jpg"/><br/>
|
2019-10-28 16:18:08 -06:00
|
|
|
alert('hacked')
|
2018-09-21 21:44:19 -06:00
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected == HTML.filter_tags(@html_sample, Pleroma.HTML.Scrubber.Default)
|
|
|
|
end
|
|
|
|
|
|
|
|
test "does not allow attribute-based XSS" do
|
|
|
|
expected = """
|
2019-10-28 16:18:08 -06:00
|
|
|
<img src="http://example.com/image.jpg"/>
|
2018-09-21 21:44:19 -06:00
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected == HTML.filter_tags(@html_onerror_sample, Pleroma.HTML.Scrubber.Default)
|
|
|
|
end
|
2019-04-23 16:55:21 -06:00
|
|
|
|
|
|
|
test "does not allow spans with invalid classes" do
|
|
|
|
expected = """
|
|
|
|
<span>hi</span>
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected == HTML.filter_tags(@html_span_class_sample, Pleroma.HTML.Scrubber.Default)
|
|
|
|
end
|
|
|
|
|
2022-11-26 19:40:21 -07:00
|
|
|
test "does not allow images with invalid classes" do
|
|
|
|
expected = """
|
|
|
|
<img src="http://example.com/image.jpg"/>
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected ==
|
|
|
|
HTML.filter_tags(@html_stillimage_sample, Pleroma.HTML.Scrubber.TwitterText)
|
|
|
|
end
|
|
|
|
|
2019-04-23 16:55:21 -06:00
|
|
|
test "does allow microformats" do
|
|
|
|
expected = """
|
|
|
|
<span class="h-card"><a class="u-url mention">@<span>foo</span></a></span>
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected ==
|
|
|
|
HTML.filter_tags(@html_span_microformats_sample, Pleroma.HTML.Scrubber.Default)
|
|
|
|
end
|
|
|
|
|
|
|
|
test "filters invalid microformats markup" do
|
|
|
|
expected = """
|
|
|
|
<span class="h-card"><a>@<span>foo</span></a></span>
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert expected ==
|
|
|
|
HTML.filter_tags(
|
|
|
|
@html_span_invalid_microformats_sample,
|
|
|
|
Pleroma.HTML.Scrubber.Default
|
|
|
|
)
|
|
|
|
end
|
2018-09-21 21:44:19 -06:00
|
|
|
end
|
2019-06-14 05:34:42 -06:00
|
|
|
|
2020-09-05 03:37:27 -06:00
|
|
|
describe "extract_first_external_url_from_object" do
|
2019-06-14 05:34:42 -06:00
|
|
|
test "extracts the url" do
|
|
|
|
user = insert(:user)
|
|
|
|
|
|
|
|
{:ok, activity} =
|
|
|
|
CommonAPI.post(user, %{
|
2020-05-12 13:59:26 -06:00
|
|
|
status:
|
2019-06-14 05:34:42 -06:00
|
|
|
"I think I just found the best github repo https://github.com/komeiji-satori/Dress"
|
|
|
|
})
|
|
|
|
|
2021-01-04 05:38:31 -07:00
|
|
|
object = Object.normalize(activity, fetch: false)
|
2020-09-05 03:37:27 -06:00
|
|
|
{:ok, url} = HTML.extract_first_external_url_from_object(object)
|
2019-06-14 05:34:42 -06:00
|
|
|
assert url == "https://github.com/komeiji-satori/Dress"
|
|
|
|
end
|
|
|
|
|
|
|
|
test "skips mentions" do
|
|
|
|
user = insert(:user)
|
|
|
|
other_user = insert(:user)
|
|
|
|
|
|
|
|
{:ok, activity} =
|
|
|
|
CommonAPI.post(user, %{
|
2020-05-12 13:59:26 -06:00
|
|
|
status:
|
2019-06-14 05:34:42 -06:00
|
|
|
"@#{other_user.nickname} install misskey! https://github.com/syuilo/misskey/blob/develop/docs/setup.en.md"
|
|
|
|
})
|
|
|
|
|
2021-01-04 05:38:31 -07:00
|
|
|
object = Object.normalize(activity, fetch: false)
|
2020-09-05 03:37:27 -06:00
|
|
|
{:ok, url} = HTML.extract_first_external_url_from_object(object)
|
2019-06-14 05:34:42 -06:00
|
|
|
|
|
|
|
assert url == "https://github.com/syuilo/misskey/blob/develop/docs/setup.en.md"
|
|
|
|
|
|
|
|
refute url == other_user.ap_id
|
|
|
|
end
|
|
|
|
|
|
|
|
test "skips hashtags" do
|
|
|
|
user = insert(:user)
|
|
|
|
|
|
|
|
{:ok, activity} =
|
|
|
|
CommonAPI.post(user, %{
|
2020-05-12 13:59:26 -06:00
|
|
|
status: "#cofe https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
|
2019-06-14 05:34:42 -06:00
|
|
|
})
|
|
|
|
|
2021-01-04 05:38:31 -07:00
|
|
|
object = Object.normalize(activity, fetch: false)
|
2020-09-05 03:37:27 -06:00
|
|
|
{:ok, url} = HTML.extract_first_external_url_from_object(object)
|
2019-06-14 05:34:42 -06:00
|
|
|
|
|
|
|
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
|
|
|
|
end
|
2019-06-18 15:31:30 -06:00
|
|
|
|
|
|
|
test "skips microformats hashtags" do
|
|
|
|
user = insert(:user)
|
|
|
|
|
|
|
|
{:ok, activity} =
|
|
|
|
CommonAPI.post(user, %{
|
2020-05-12 13:59:26 -06:00
|
|
|
status:
|
2019-06-18 15:31:30 -06:00
|
|
|
"<a href=\"https://pleroma.gov/tags/cofe\" rel=\"tag\">#cofe</a> https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140",
|
2020-05-12 13:59:26 -06:00
|
|
|
content_type: "text/html"
|
2019-06-18 15:31:30 -06:00
|
|
|
})
|
|
|
|
|
2021-01-04 05:38:31 -07:00
|
|
|
object = Object.normalize(activity, fetch: false)
|
2020-09-05 03:37:27 -06:00
|
|
|
{:ok, url} = HTML.extract_first_external_url_from_object(object)
|
2019-06-18 15:31:30 -06:00
|
|
|
|
|
|
|
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
|
|
|
|
end
|
2019-11-29 01:49:35 -07:00
|
|
|
|
|
|
|
test "does not crash when there is an HTML entity in a link" do
|
|
|
|
user = insert(:user)
|
|
|
|
|
2020-05-12 13:59:26 -06:00
|
|
|
{:ok, activity} = CommonAPI.post(user, %{status: "\"http://cofe.com/?boomer=ok&foo=bar\""})
|
2019-11-29 01:49:35 -07:00
|
|
|
|
2021-01-04 05:38:31 -07:00
|
|
|
object = Object.normalize(activity, fetch: false)
|
2019-11-29 01:49:35 -07:00
|
|
|
|
2020-09-05 03:37:27 -06:00
|
|
|
assert {:ok, nil} = HTML.extract_first_external_url_from_object(object)
|
2019-11-29 01:49:35 -07:00
|
|
|
end
|
2020-06-29 06:25:57 -06:00
|
|
|
|
|
|
|
test "skips attachment links" do
|
|
|
|
user = insert(:user)
|
|
|
|
|
|
|
|
{:ok, activity} =
|
|
|
|
CommonAPI.post(user, %{
|
|
|
|
status:
|
|
|
|
"<a href=\"https://pleroma.gov/media/d24caa3a498e21e0298377a9ca0149a4f4f8b767178aacf837542282e2d94fb1.png?name=image.png\" class=\"attachment\">image.png</a>"
|
|
|
|
})
|
|
|
|
|
2021-01-04 05:38:31 -07:00
|
|
|
object = Object.normalize(activity, fetch: false)
|
2020-06-29 06:25:57 -06:00
|
|
|
|
2020-09-05 03:37:27 -06:00
|
|
|
assert {:ok, nil} = HTML.extract_first_external_url_from_object(object)
|
2020-06-29 06:25:57 -06:00
|
|
|
end
|
2019-06-14 05:34:42 -06:00
|
|
|
end
|
2018-09-21 21:44:19 -06:00
|
|
|
end
|