From ac3c936820033f27e32c9a4490f2f33d6ffd6b05 Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Thu, 2 Jan 2020 11:03:03 -0800 Subject: Make sure whitelisted URI schemes are handled properly by the sanitizer --- reader/sanitizer/sanitizer.go | 45 ++++---- reader/sanitizer/sanitizer_test.go | 228 +++++++++++++++++++++++++++++++++++++ 2 files changed, 250 insertions(+), 23 deletions(-) (limited to 'reader') diff --git a/reader/sanitizer/sanitizer.go b/reader/sanitizer/sanitizer.go index 642fedf..0092bed 100644 --- a/reader/sanitizer/sanitizer.go +++ b/reader/sanitizer/sanitizer.go @@ -111,7 +111,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([ continue } - if !hasValidScheme(value) || isBlacklistedResource(value) { + if !hasValidURIScheme(value) || isBlacklistedResource(value) { continue } } @@ -221,17 +221,19 @@ func hasRequiredAttributes(tagName string, attributes []string) bool { return true } -func hasValidScheme(src string) bool { - // See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml +// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml +func hasValidURIScheme(src string) bool { whitelist := []string{ - "apt://", - "bitcoin://", - "callto://", + "apt:", + "bitcoin:", + "callto:", + "dav:", + "davs:", "ed2k://", "facetime://", - "feed://", + "feed:", "ftp://", - "geo://", + "geo:", "gopher://", "git://", "http://", @@ -240,27 +242,24 @@ func hasValidScheme(src string) bool { "irc6://", "ircs://", "itms://", - "jabber://", - "magnet://", - "mailto://", - "maps://", - "news://", - "nfs://", - "nntp://", + "itms-apps://", + "magnet:", + "mailto:", + "news:", + "nntp:", "rtmp://", - "sip://", - "sips://", - "skype://", - "smb://", - "sms://", - "spotify://", + "sip:", + "sips:", + "skype:", + "spotify:", "ssh://", "sftp://", "steam://", "svn://", - "tel://", + "svn+ssh://", + "tel:", "webcal://", - "xmpp://", + "xmpp:", } for _, prefix := range whitelist { diff --git a/reader/sanitizer/sanitizer_test.go b/reader/sanitizer/sanitizer_test.go index 649b7f0..1dfa103 100644 --- a/reader/sanitizer/sanitizer_test.go +++ b/reader/sanitizer/sanitizer_test.go @@ -123,6 +123,234 @@ func TestInvalidURLScheme(t *testing.T) { } } +func TestAPTURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestBitcoinURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestCallToURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestFeedURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } + + input = `

This link is valid

` + expected = `

This link is valid

` + output = Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestGeoURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestItunesURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } + + input = `

This link is valid

` + expected = `

This link is valid

` + output = Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestMagnetURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestMailtoURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestNewsURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } + + input = `

This link is valid

` + expected = `

This link is valid

` + output = Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } + + input = `

This link is valid

` + expected = `

This link is valid

` + output = Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestRTMPURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestSIPURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } + + input = `

This link is valid

` + expected = `

This link is valid

` + output = Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestSkypeURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestSpotifyURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestSteamURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestSubversionURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } + + input = `

This link is valid

` + expected = `

This link is valid

` + output = Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestTelURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestWebcalURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestXMPPURIScheme(t *testing.T) { + input := `

This link is valid

` + expected := `

This link is valid

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + func TestBlacklistedLink(t *testing.T) { input := `

This image is not valid

` expected := `

This image is not valid

` -- cgit v1.2.3