aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2019-09-18 22:27:25 -0700
committerGravatar Frédéric Guillot <fred@miniflux.net>2019-09-18 22:45:56 -0700
commit36d773223481dd42d31499b3ea73e6999ff9f58e (patch)
treea122e207303b2fa33b334dbe19e57a508a955a25 /reader
parentca48f7612ada5d64d1097c40321a919eb28eb2bf (diff)
Disable strict XML parsing
This change should improve parsing of broken XML feeds. See https://golang.org/pkg/encoding/xml/#Decoder
Diffstat (limited to 'reader')
-rw-r--r--reader/atom/parser.go1
-rw-r--r--reader/atom/parser_test.go19
-rw-r--r--reader/opml/parser.go1
-rw-r--r--reader/opml/parser_test.go34
-rw-r--r--reader/rdf/parser.go1
-rw-r--r--reader/rdf/parser_test.go19
-rw-r--r--reader/rss/parser.go1
-rw-r--r--reader/rss/parser_test.go19
8 files changed, 95 insertions, 0 deletions
diff --git a/reader/atom/parser.go b/reader/atom/parser.go
index 85be4b5..4749c1a 100644
--- a/reader/atom/parser.go
+++ b/reader/atom/parser.go
@@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
atomFeed := new(atomFeed)
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
+ decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(atomFeed)
diff --git a/reader/atom/parser_test.go b/reader/atom/parser_test.go
index dc42575..746c767 100644
--- a/reader/atom/parser_test.go
+++ b/reader/atom/parser_test.go
@@ -577,3 +577,22 @@ func TestParseWithHTMLEntity(t *testing.T) {
t.Errorf(`Incorrect title, got: %q`, feed.Title)
}
}
+
+func TestParseWithInvalidCharacterEntity(t *testing.T) {
+ data := `
+ <?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Example Feed</title>
+ <link href="http://example.org/a&b"/>
+ </feed>
+ `
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.SiteURL != "http://example.org/a&b" {
+ t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
+ }
+}
diff --git a/reader/opml/parser.go b/reader/opml/parser.go
index 1bdec20..97974bb 100644
--- a/reader/opml/parser.go
+++ b/reader/opml/parser.go
@@ -17,6 +17,7 @@ func Parse(data io.Reader) (SubcriptionList, *errors.LocalizedError) {
feeds := new(opml)
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
+ decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(feeds)
diff --git a/reader/opml/parser_test.go b/reader/opml/parser_test.go
index f6ce634..6c09db8 100644
--- a/reader/opml/parser_test.go
+++ b/reader/opml/parser_test.go
@@ -193,6 +193,40 @@ func TestParseOpmlVersion1WithoutOuterOutline(t *testing.T) {
}
}
}
+
+func TestParseOpmlWithInvalidCharacterEntity(t *testing.T) {
+ data := `<?xml version="1.0"?>
+ <opml version="1.0">
+ <head>
+ <title>mySubscriptions.opml</title>
+ </head>
+ <body>
+ <outline title="Feed 1">
+ <outline type="rss" title="Feed 1" xmlUrl="http://example.org/feed1/a&b" htmlUrl="http://example.org/c&d"></outline>
+ </outline>
+ </body>
+ </opml>
+ `
+
+ var expected SubcriptionList
+ expected = append(expected, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed1/a&b", SiteURL: "http://example.org/c&d", CategoryName: ""})
+
+ subscriptions, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(subscriptions) != 1 {
+ t.Errorf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 1)
+ }
+
+ for i := 0; i < len(subscriptions); i++ {
+ if !subscriptions[i].Equals(expected[i]) {
+ t.Errorf(`Subscription are different: "%v" vs "%v"`, subscriptions[i], expected[i])
+ }
+ }
+}
+
func TestParseInvalidXML(t *testing.T) {
data := `garbage`
_, err := Parse(bytes.NewBufferString(data))
diff --git a/reader/rdf/parser.go b/reader/rdf/parser.go
index f942317..861ce8c 100644
--- a/reader/rdf/parser.go
+++ b/reader/rdf/parser.go
@@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
feed := new(rdfFeed)
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
+ decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(feed)
diff --git a/reader/rdf/parser_test.go b/reader/rdf/parser_test.go
index 734b763..4f3d033 100644
--- a/reader/rdf/parser_test.go
+++ b/reader/rdf/parser_test.go
@@ -403,3 +403,22 @@ func TestParseFeedWithHTMLEntity(t *testing.T) {
t.Errorf(`Incorrect title, got: %q`, feed.Title)
}
}
+
+func TestParseFeedWithInvalidCharacterEntity(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
+ <channel>
+ <title>Example Feed</title>
+ <link>http://example.org/a&b</link>
+ </channel>
+ </rdf:RDF>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.SiteURL != "http://example.org/a&b" {
+ t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
+ }
+}
diff --git a/reader/rss/parser.go b/reader/rss/parser.go
index 2b46405..79bd144 100644
--- a/reader/rss/parser.go
+++ b/reader/rss/parser.go
@@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
feed := new(rssFeed)
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
+ decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(feed)
diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go
index 72cec4e..e604927 100644
--- a/reader/rss/parser_test.go
+++ b/reader/rss/parser_test.go
@@ -633,3 +633,22 @@ func TestParseWithHTMLEntity(t *testing.T) {
t.Errorf(`Incorrect title, got: %q`, feed.Title)
}
}
+
+func TestParseWithInvalidCharacterEntity(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
+ <channel>
+ <link>https://example.org/a&b</link>
+ <title>Example Feed</title>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.SiteURL != "https://example.org/a&b" {
+ t.Errorf(`Incorrect url, got: %q`, feed.SiteURL)
+ }
+}