diff options
author | Dave Z <dzaikos@users.noreply.github.com> | 2018-06-23 20:50:43 -0400 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2018-06-23 17:50:43 -0700 |
commit | d847b10e32270c3cf7bafe6e2914e82bda39a924 (patch) | |
tree | 34bfb8caad1b4c4d2b3379140ab5e3edc9c5fda8 /reader/sanitizer | |
parent | 7039df9af1de1aea72e90d4aa9fa6a37d21e1be0 (diff) |
Improve sanitizer to remove script and noscript contents
These tags where removed but the content was rendered as escaped HTML.
See #157
Diffstat (limited to 'reader/sanitizer')
-rw-r--r-- | reader/sanitizer/sanitizer.go | 13 | ||||
-rw-r--r-- | reader/sanitizer/sanitizer_test.go | 20 |
2 files changed, 33 insertions, 0 deletions
diff --git a/reader/sanitizer/sanitizer.go b/reader/sanitizer/sanitizer.go index 2853911..d7a4626 100644 --- a/reader/sanitizer/sanitizer.go +++ b/reader/sanitizer/sanitizer.go @@ -25,6 +25,7 @@ func Sanitize(baseURL, input string) string { tokenizer := html.NewTokenizer(bytes.NewBufferString(input)) var buffer bytes.Buffer var tagStack []string + scriptTagDepth := 0 for { if tokenizer.Next() == html.ErrorToken { @@ -39,6 +40,10 @@ func Sanitize(baseURL, input string) string { token := tokenizer.Token() switch token.Type { case html.TextToken: + if scriptTagDepth > 0 { + continue + } + buffer.WriteString(html.EscapeString(token.Data)) case html.StartTagToken: tagName := token.DataAtom.String() @@ -55,11 +60,15 @@ func Sanitize(baseURL, input string) string { tagStack = append(tagStack, tagName) } + } else if isScriptTag(tagName) { + scriptTagDepth++ } case html.EndTagToken: tagName := token.DataAtom.String() if isValidTag(tagName) && inList(tagName, tagStack) { buffer.WriteString(fmt.Sprintf("</%s>", tagName)) + } else if isScriptTag(tagName) { + scriptTagDepth-- } case html.SelfClosingTagToken: tagName := token.DataAtom.String() @@ -384,3 +393,7 @@ func rewriteIframeURL(link string) string { return link } + +func isScriptTag(tagName string) bool { + return tagName == "script" || tagName == "noscript" +} diff --git a/reader/sanitizer/sanitizer_test.go b/reader/sanitizer/sanitizer_test.go index 6eb9b0d..fa7dd6d 100644 --- a/reader/sanitizer/sanitizer_test.go +++ b/reader/sanitizer/sanitizer_test.go @@ -212,3 +212,23 @@ func TestReplaceIframeURL(t *testing.T) { t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) } } + +func TestReplaceNoScript(t *testing.T) { + input := `<p>Before paragraph.</p><noscript>Inside <code>noscript</code> tag with an image: <img src="http://example.org/" alt="Test"></noscript><p>After paragraph.</p>` + expected := `<p>Before paragraph.</p><p>After paragraph.</p>` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestReplaceScript(t *testing.T) { + input := `<p>Before paragraph.</p><script type="text/javascript">alert("1");</script><p>After paragraph.</p>` + expected := `<p>Before paragraph.</p><p>After paragraph.</p>` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} |