diff options
author | Frédéric Guillot <fred@miniflux.net> | 2019-09-18 22:27:25 -0700 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2019-09-18 22:45:56 -0700 |
commit | 36d773223481dd42d31499b3ea73e6999ff9f58e (patch) | |
tree | a122e207303b2fa33b334dbe19e57a508a955a25 /reader | |
parent | ca48f7612ada5d64d1097c40321a919eb28eb2bf (diff) |
Disable strict XML parsing
This change should improve parsing of broken XML feeds.
See https://golang.org/pkg/encoding/xml/#Decoder
Diffstat (limited to 'reader')
-rw-r--r-- | reader/atom/parser.go | 1 | ||||
-rw-r--r-- | reader/atom/parser_test.go | 19 | ||||
-rw-r--r-- | reader/opml/parser.go | 1 | ||||
-rw-r--r-- | reader/opml/parser_test.go | 34 | ||||
-rw-r--r-- | reader/rdf/parser.go | 1 | ||||
-rw-r--r-- | reader/rdf/parser_test.go | 19 | ||||
-rw-r--r-- | reader/rss/parser.go | 1 | ||||
-rw-r--r-- | reader/rss/parser_test.go | 19 |
8 files changed, 95 insertions, 0 deletions
diff --git a/reader/atom/parser.go b/reader/atom/parser.go index 85be4b5..4749c1a 100644 --- a/reader/atom/parser.go +++ b/reader/atom/parser.go @@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) { atomFeed := new(atomFeed) decoder := xml.NewDecoder(data) decoder.Entity = xml.HTMLEntity + decoder.Strict = false decoder.CharsetReader = encoding.CharsetReader err := decoder.Decode(atomFeed) diff --git a/reader/atom/parser_test.go b/reader/atom/parser_test.go index dc42575..746c767 100644 --- a/reader/atom/parser_test.go +++ b/reader/atom/parser_test.go @@ -577,3 +577,22 @@ func TestParseWithHTMLEntity(t *testing.T) { t.Errorf(`Incorrect title, got: %q`, feed.Title) } } + +func TestParseWithInvalidCharacterEntity(t *testing.T) { + data := ` + <?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/a&b"/> + </feed> + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.SiteURL != "http://example.org/a&b" { + t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL) + } +} diff --git a/reader/opml/parser.go b/reader/opml/parser.go index 1bdec20..97974bb 100644 --- a/reader/opml/parser.go +++ b/reader/opml/parser.go @@ -17,6 +17,7 @@ func Parse(data io.Reader) (SubcriptionList, *errors.LocalizedError) { feeds := new(opml) decoder := xml.NewDecoder(data) decoder.Entity = xml.HTMLEntity + decoder.Strict = false decoder.CharsetReader = encoding.CharsetReader err := decoder.Decode(feeds) diff --git a/reader/opml/parser_test.go b/reader/opml/parser_test.go index f6ce634..6c09db8 100644 --- a/reader/opml/parser_test.go +++ b/reader/opml/parser_test.go @@ -193,6 +193,40 @@ func TestParseOpmlVersion1WithoutOuterOutline(t *testing.T) { } } } + +func TestParseOpmlWithInvalidCharacterEntity(t *testing.T) { + data := `<?xml version="1.0"?> + <opml version="1.0"> + <head> + <title>mySubscriptions.opml</title> + </head> + <body> + <outline title="Feed 1"> + <outline type="rss" title="Feed 1" xmlUrl="http://example.org/feed1/a&b" htmlUrl="http://example.org/c&d"></outline> + </outline> + </body> + </opml> + ` + + var expected SubcriptionList + expected = append(expected, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed1/a&b", SiteURL: "http://example.org/c&d", CategoryName: ""}) + + subscriptions, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if len(subscriptions) != 1 { + t.Errorf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 1) + } + + for i := 0; i < len(subscriptions); i++ { + if !subscriptions[i].Equals(expected[i]) { + t.Errorf(`Subscription are different: "%v" vs "%v"`, subscriptions[i], expected[i]) + } + } +} + func TestParseInvalidXML(t *testing.T) { data := `garbage` _, err := Parse(bytes.NewBufferString(data)) diff --git a/reader/rdf/parser.go b/reader/rdf/parser.go index f942317..861ce8c 100644 --- a/reader/rdf/parser.go +++ b/reader/rdf/parser.go @@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) { feed := new(rdfFeed) decoder := xml.NewDecoder(data) decoder.Entity = xml.HTMLEntity + decoder.Strict = false decoder.CharsetReader = encoding.CharsetReader err := decoder.Decode(feed) diff --git a/reader/rdf/parser_test.go b/reader/rdf/parser_test.go index 734b763..4f3d033 100644 --- a/reader/rdf/parser_test.go +++ b/reader/rdf/parser_test.go @@ -403,3 +403,22 @@ func TestParseFeedWithHTMLEntity(t *testing.T) { t.Errorf(`Incorrect title, got: %q`, feed.Title) } } + +func TestParseFeedWithInvalidCharacterEntity(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/"> + <channel> + <title>Example Feed</title> + <link>http://example.org/a&b</link> + </channel> + </rdf:RDF>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.SiteURL != "http://example.org/a&b" { + t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL) + } +} diff --git a/reader/rss/parser.go b/reader/rss/parser.go index 2b46405..79bd144 100644 --- a/reader/rss/parser.go +++ b/reader/rss/parser.go @@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) { feed := new(rssFeed) decoder := xml.NewDecoder(data) decoder.Entity = xml.HTMLEntity + decoder.Strict = false decoder.CharsetReader = encoding.CharsetReader err := decoder.Decode(feed) diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index 72cec4e..e604927 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -633,3 +633,22 @@ func TestParseWithHTMLEntity(t *testing.T) { t.Errorf(`Incorrect title, got: %q`, feed.Title) } } + +func TestParseWithInvalidCharacterEntity(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/"> + <channel> + <link>https://example.org/a&b</link> + <title>Example Feed</title> + </channel> + </rss>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.SiteURL != "https://example.org/a&b" { + t.Errorf(`Incorrect url, got: %q`, feed.SiteURL) + } +} |