aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader
diff options
context:
space:
mode:
authorGravatar Dave Z <dzaikos@users.noreply.github.com>2018-06-23 20:50:43 -0400
committerGravatar Frédéric Guillot <fred@miniflux.net>2018-06-23 17:50:43 -0700
commitd847b10e32270c3cf7bafe6e2914e82bda39a924 (patch)
tree34bfb8caad1b4c4d2b3379140ab5e3edc9c5fda8 /reader
parent7039df9af1de1aea72e90d4aa9fa6a37d21e1be0 (diff)
Improve sanitizer to remove script and noscript contents
These tags where removed but the content was rendered as escaped HTML. See #157
Diffstat (limited to 'reader')
-rw-r--r--reader/sanitizer/sanitizer.go13
-rw-r--r--reader/sanitizer/sanitizer_test.go20
2 files changed, 33 insertions, 0 deletions
diff --git a/reader/sanitizer/sanitizer.go b/reader/sanitizer/sanitizer.go
index 2853911..d7a4626 100644
--- a/reader/sanitizer/sanitizer.go
+++ b/reader/sanitizer/sanitizer.go
@@ -25,6 +25,7 @@ func Sanitize(baseURL, input string) string {
tokenizer := html.NewTokenizer(bytes.NewBufferString(input))
var buffer bytes.Buffer
var tagStack []string
+ scriptTagDepth := 0
for {
if tokenizer.Next() == html.ErrorToken {
@@ -39,6 +40,10 @@ func Sanitize(baseURL, input string) string {
token := tokenizer.Token()
switch token.Type {
case html.TextToken:
+ if scriptTagDepth > 0 {
+ continue
+ }
+
buffer.WriteString(html.EscapeString(token.Data))
case html.StartTagToken:
tagName := token.DataAtom.String()
@@ -55,11 +60,15 @@ func Sanitize(baseURL, input string) string {
tagStack = append(tagStack, tagName)
}
+ } else if isScriptTag(tagName) {
+ scriptTagDepth++
}
case html.EndTagToken:
tagName := token.DataAtom.String()
if isValidTag(tagName) && inList(tagName, tagStack) {
buffer.WriteString(fmt.Sprintf("</%s>", tagName))
+ } else if isScriptTag(tagName) {
+ scriptTagDepth--
}
case html.SelfClosingTagToken:
tagName := token.DataAtom.String()
@@ -384,3 +393,7 @@ func rewriteIframeURL(link string) string {
return link
}
+
+func isScriptTag(tagName string) bool {
+ return tagName == "script" || tagName == "noscript"
+}
diff --git a/reader/sanitizer/sanitizer_test.go b/reader/sanitizer/sanitizer_test.go
index 6eb9b0d..fa7dd6d 100644
--- a/reader/sanitizer/sanitizer_test.go
+++ b/reader/sanitizer/sanitizer_test.go
@@ -212,3 +212,23 @@ func TestReplaceIframeURL(t *testing.T) {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
+
+func TestReplaceNoScript(t *testing.T) {
+ input := `<p>Before paragraph.</p><noscript>Inside <code>noscript</code> tag with an image: <img src="http://example.org/" alt="Test"></noscript><p>After paragraph.</p>`
+ expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
+ output := Sanitize("http://example.org/", input)
+
+ if expected != output {
+ t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
+ }
+}
+
+func TestReplaceScript(t *testing.T) {
+ input := `<p>Before paragraph.</p><script type="text/javascript">alert("1");</script><p>After paragraph.</p>`
+ expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
+ output := Sanitize("http://example.org/", input)
+
+ if expected != output {
+ t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
+ }
+}