diff options
author | Frédéric Guillot <fred@miniflux.net> | 2020-01-04 15:18:24 -0800 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2020-01-04 15:54:16 -0800 |
commit | bf632fad2e19e9ece4db5957f05727f373541917 (patch) | |
tree | 34c8f90d920f55892c10a25bba47f07c580dfe74 /reader/rss | |
parent | 8cebd985a267f6fbcc363672ca81780dd5407eff (diff) |
Allow only absolute URLs in comments URL
Some feeds are using invalid URLs (random text).
Diffstat (limited to 'reader/rss')
-rw-r--r-- | reader/rss/parser_test.go | 25 | ||||
-rw-r--r-- | reader/rss/rss.go | 7 |
2 files changed, 31 insertions, 1 deletions
diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index dd1d261..b7ebd0a 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -837,6 +837,31 @@ func TestParseEntryWithCommentsURL(t *testing.T) { } } +func TestParseEntryWithInvalidCommentsURL(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/"> + <channel> + <link>https://example.org/</link> + <item> + <title>Item 1</title> + <link>https://example.org/item1</link> + <comments> + Some text + </comments> + </item> + </channel> + </rss>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].CommentsURL != "" { + t.Errorf("Incorrect entry comments URL, got: %q", feed.Entries[0].CommentsURL) + } +} + func TestParseInvalidXml(t *testing.T) { data := `garbage` _, err := Parse(bytes.NewBufferString(data)) diff --git a/reader/rss/rss.go b/reader/rss/rss.go index fd120cb..172f6f8 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -317,7 +317,12 @@ func (r *rssItem) entryEnclosures() model.EnclosureList { func (r *rssItem) entryCommentsURL() string { for _, commentLink := range r.CommentLinks { if commentLink.XMLName.Space == "" { - return strings.TrimSpace(commentLink.Data) + commentsURL := strings.TrimSpace(commentLink.Data) + // The comments URL is supposed to be absolute (some feeds publishes incorrect comments URL) + // See https://cyber.harvard.edu/rss/rss.html#ltcommentsgtSubelementOfLtitemgt + if url.IsAbsoluteURL(commentsURL) { + return commentsURL + } } } |