diff options
author | Frédéric Guillot <fred@miniflux.net> | 2017-11-25 18:08:59 -0800 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2017-11-25 18:08:59 -0800 |
commit | bd663b43a0b2d26936ba8a6172090b845a17550c (patch) | |
tree | a2f6c7d77f2db4c6ee34a0fff3ffdcb5b1b852a5 /reader | |
parent | 1f015d5dfeb3eba7c0c2d1eb2f697dd18c128ecd (diff) |
Improve HTML sanitizer
Diffstat (limited to 'reader')
-rw-r--r-- | reader/json/parser_test.go | 2 | ||||
-rw-r--r-- | reader/rss/parser_test.go | 2 | ||||
-rw-r--r-- | reader/sanitizer/sanitizer.go | 11 | ||||
-rw-r--r-- | reader/sanitizer/sanitizer_test.go | 20 |
4 files changed, 28 insertions, 7 deletions
diff --git a/reader/json/parser_test.go b/reader/json/parser_test.go index ecb11a1..dd680f8 100644 --- a/reader/json/parser_test.go +++ b/reader/json/parser_test.go @@ -148,7 +148,7 @@ func TestParsePodcast(t *testing.T) { t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title) } - if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` { + if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` { t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content) } diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index 9f1a557..f92be50 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -94,7 +94,7 @@ func TestParseRss2Sample(t *testing.T) { t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) } - if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` { + if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` { t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) } } diff --git a/reader/sanitizer/sanitizer.go b/reader/sanitizer/sanitizer.go index 6af034c..ad286c6 100644 --- a/reader/sanitizer/sanitizer.go +++ b/reader/sanitizer/sanitizer.go @@ -7,10 +7,11 @@ package sanitizer import ( "bytes" "fmt" - "github.com/miniflux/miniflux2/reader/url" "io" "strings" + "github.com/miniflux/miniflux2/reader/url" + "golang.org/x/net/html" ) @@ -33,7 +34,7 @@ func Sanitize(baseURL, input string) string { token := tokenizer.Token() switch token.Type { case html.TextToken: - buffer.WriteString(token.Data) + buffer.WriteString(html.EscapeString(token.Data)) case html.StartTagToken: tagName := token.DataAtom.String() @@ -72,8 +73,8 @@ func Sanitize(baseURL, input string) string { } } -func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) (attrNames []string, html string) { - var htmlAttrs []string +func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([]string, string) { + var htmlAttrs, attrNames []string var err error for _, attribute := range attributes { @@ -99,7 +100,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) (a } attrNames = append(attrNames, attribute.Key) - htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, value)) + htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, html.EscapeString(value))) } extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName) diff --git a/reader/sanitizer/sanitizer_test.go b/reader/sanitizer/sanitizer_test.go index 73862d3..6456378 100644 --- a/reader/sanitizer/sanitizer_test.go +++ b/reader/sanitizer/sanitizer_test.go @@ -142,3 +142,23 @@ func TestPixelTracker(t *testing.T) { t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) } } + +func TestXmlEntities(t *testing.T) { + input := `<pre>echo "test" > /etc/hosts</pre>` + expected := `<pre>echo "test" > /etc/hosts</pre>` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + +func TestEspaceAttributes(t *testing.T) { + input := `<td rowspan="<b>test</b>">test</td>` + expected := `<td rowspan="<b>test</b>">test</td>` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} |