From ac3c936820033f27e32c9a4490f2f33d6ffd6b05 Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Thu, 2 Jan 2020 11:03:03 -0800 Subject: Make sure whitelisted URI schemes are handled properly by the sanitizer --- reader/sanitizer/sanitizer.go | 45 ++++---- reader/sanitizer/sanitizer_test.go | 228 +++++++++++++++++++++++++++++++++++++ url/url_test.go | 2 + 3 files changed, 252 insertions(+), 23 deletions(-) diff --git a/reader/sanitizer/sanitizer.go b/reader/sanitizer/sanitizer.go index 642fedf..0092bed 100644 --- a/reader/sanitizer/sanitizer.go +++ b/reader/sanitizer/sanitizer.go @@ -111,7 +111,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([ continue } - if !hasValidScheme(value) || isBlacklistedResource(value) { + if !hasValidURIScheme(value) || isBlacklistedResource(value) { continue } } @@ -221,17 +221,19 @@ func hasRequiredAttributes(tagName string, attributes []string) bool { return true } -func hasValidScheme(src string) bool { - // See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml +// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml +func hasValidURIScheme(src string) bool { whitelist := []string{ - "apt://", - "bitcoin://", - "callto://", + "apt:", + "bitcoin:", + "callto:", + "dav:", + "davs:", "ed2k://", "facetime://", - "feed://", + "feed:", "ftp://", - "geo://", + "geo:", "gopher://", "git://", "http://", @@ -240,27 +242,24 @@ func hasValidScheme(src string) bool { "irc6://", "ircs://", "itms://", - "jabber://", - "magnet://", - "mailto://", - "maps://", - "news://", - "nfs://", - "nntp://", + "itms-apps://", + "magnet:", + "mailto:", + "news:", + "nntp:", "rtmp://", - "sip://", - "sips://", - "skype://", - "smb://", - "sms://", - "spotify://", + "sip:", + "sips:", + "skype:", + "spotify:", "ssh://", "sftp://", "steam://", "svn://", - "tel://", + "svn+ssh://", + "tel:", "webcal://", - "xmpp://", + "xmpp:", } for _, prefix := range whitelist { diff --git a/reader/sanitizer/sanitizer_test.go b/reader/sanitizer/sanitizer_test.go index 649b7f0..1dfa103 100644 --- a/reader/sanitizer/sanitizer_test.go +++ b/reader/sanitizer/sanitizer_test.go @@ -123,6 +123,234 @@ func TestInvalidURLScheme(t *testing.T) { } } +func TestAPTURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestBitcoinURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestCallToURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestFeedURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } + + input = `

This link is valid

` + expected = `

This link is valid

` + output = Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestGeoURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestItunesURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } + + input = `

This link is valid

` + expected = `

This link is valid

` + output = Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestMagnetURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestMailtoURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestNewsURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } + + input = `

This link is valid

` + expected = `

This link is valid

` + output = Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } + + input = `

This link is valid

` + expected = `

This link is valid

` + output = Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestRTMPURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestSIPURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } + + input = `

This link is valid

` + expected = `

This link is valid

` + output = Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestSkypeURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestSpotifyURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestSteamURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestSubversionURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } + + input = `

This link is valid

` + expected = `

This link is valid

` + output = Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestTelURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestWebcalURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestXMPPURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + func TestBlacklistedLink(t *testing.T) { input := `

This image is not valid

` expected := `

This image is not valid

` diff --git a/url/url_test.go b/url/url_test.go index 54868a9..56b6e13 100644 --- a/url/url_test.go +++ b/url/url_test.go @@ -13,6 +13,8 @@ func TestAbsoluteURL(t *testing.T) { []string{"https://example.org/path/file.ext", "https://example.org/folder", "path/file.ext"}, []string{"https://example.org/path/file.ext", "https://example.org/folder/", "https://example.org/path/file.ext"}, []string{"https://static.example.org/path/file.ext", "https://www.example.org/", "//static.example.org/path/file.ext"}, + []string{"magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a", "https://www.example.org/", "magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"}, + []string{"magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7", "https://www.example.org/", "magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7"}, } for _, scenario := range scenarios { -- cgit v1.2.3