aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/rss
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2019-09-18 22:27:25 -0700
committerGravatar Frédéric Guillot <fred@miniflux.net>2019-09-18 22:45:56 -0700
commit36d773223481dd42d31499b3ea73e6999ff9f58e (patch)
treea122e207303b2fa33b334dbe19e57a508a955a25 /reader/rss
parentca48f7612ada5d64d1097c40321a919eb28eb2bf (diff)
Disable strict XML parsing
This change should improve parsing of broken XML feeds. See https://golang.org/pkg/encoding/xml/#Decoder
Diffstat (limited to 'reader/rss')
-rw-r--r--reader/rss/parser.go1
-rw-r--r--reader/rss/parser_test.go19
2 files changed, 20 insertions, 0 deletions
diff --git a/reader/rss/parser.go b/reader/rss/parser.go
index 2b46405..79bd144 100644
--- a/reader/rss/parser.go
+++ b/reader/rss/parser.go
@@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
feed := new(rssFeed)
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
+ decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(feed)
diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go
index 72cec4e..e604927 100644
--- a/reader/rss/parser_test.go
+++ b/reader/rss/parser_test.go
@@ -633,3 +633,22 @@ func TestParseWithHTMLEntity(t *testing.T) {
t.Errorf(`Incorrect title, got: %q`, feed.Title)
}
}
+
+func TestParseWithInvalidCharacterEntity(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
+ <channel>
+ <link>https://example.org/a&b</link>
+ <title>Example Feed</title>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.SiteURL != "https://example.org/a&b" {
+ t.Errorf(`Incorrect url, got: %q`, feed.SiteURL)
+ }
+}