diff options
author | Frédéric Guillot <fred@miniflux.net> | 2019-09-18 22:27:25 -0700 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2019-09-18 22:45:56 -0700 |
commit | 36d773223481dd42d31499b3ea73e6999ff9f58e (patch) | |
tree | a122e207303b2fa33b334dbe19e57a508a955a25 /reader/rdf | |
parent | ca48f7612ada5d64d1097c40321a919eb28eb2bf (diff) |
Disable strict XML parsing
This change should improve parsing of broken XML feeds.
See https://golang.org/pkg/encoding/xml/#Decoder
Diffstat (limited to 'reader/rdf')
-rw-r--r-- | reader/rdf/parser.go | 1 | ||||
-rw-r--r-- | reader/rdf/parser_test.go | 19 |
2 files changed, 20 insertions, 0 deletions
diff --git a/reader/rdf/parser.go b/reader/rdf/parser.go index f942317..861ce8c 100644 --- a/reader/rdf/parser.go +++ b/reader/rdf/parser.go @@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) { feed := new(rdfFeed) decoder := xml.NewDecoder(data) decoder.Entity = xml.HTMLEntity + decoder.Strict = false decoder.CharsetReader = encoding.CharsetReader err := decoder.Decode(feed) diff --git a/reader/rdf/parser_test.go b/reader/rdf/parser_test.go index 734b763..4f3d033 100644 --- a/reader/rdf/parser_test.go +++ b/reader/rdf/parser_test.go @@ -403,3 +403,22 @@ func TestParseFeedWithHTMLEntity(t *testing.T) { t.Errorf(`Incorrect title, got: %q`, feed.Title) } } + +func TestParseFeedWithInvalidCharacterEntity(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/"> + <channel> + <title>Example Feed</title> + <link>http://example.org/a&b</link> + </channel> + </rdf:RDF>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.SiteURL != "http://example.org/a&b" { + t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL) + } +} |