aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/rss
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2020-01-04 15:18:24 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2020-01-04 15:54:16 -0800
commitbf632fad2e19e9ece4db5957f05727f373541917 (patch)
tree34c8f90d920f55892c10a25bba47f07c580dfe74 /reader/rss
parent8cebd985a267f6fbcc363672ca81780dd5407eff (diff)
Allow only absolute URLs in comments URL
Some feeds are using invalid URLs (random text).
Diffstat (limited to 'reader/rss')
-rw-r--r--reader/rss/parser_test.go25
-rw-r--r--reader/rss/rss.go7
2 files changed, 31 insertions, 1 deletions
diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go
index dd1d261..b7ebd0a 100644
--- a/reader/rss/parser_test.go
+++ b/reader/rss/parser_test.go
@@ -837,6 +837,31 @@ func TestParseEntryWithCommentsURL(t *testing.T) {
}
}
+func TestParseEntryWithInvalidCommentsURL(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
+ <channel>
+ <link>https://example.org/</link>
+ <item>
+ <title>Item 1</title>
+ <link>https://example.org/item1</link>
+ <comments>
+ Some text
+ </comments>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].CommentsURL != "" {
+ t.Errorf("Incorrect entry comments URL, got: %q", feed.Entries[0].CommentsURL)
+ }
+}
+
func TestParseInvalidXml(t *testing.T) {
data := `garbage`
_, err := Parse(bytes.NewBufferString(data))
diff --git a/reader/rss/rss.go b/reader/rss/rss.go
index fd120cb..172f6f8 100644
--- a/reader/rss/rss.go
+++ b/reader/rss/rss.go
@@ -317,7 +317,12 @@ func (r *rssItem) entryEnclosures() model.EnclosureList {
func (r *rssItem) entryCommentsURL() string {
for _, commentLink := range r.CommentLinks {
if commentLink.XMLName.Space == "" {
- return strings.TrimSpace(commentLink.Data)
+ commentsURL := strings.TrimSpace(commentLink.Data)
+ // The comments URL is supposed to be absolute (some feeds publishes incorrect comments URL)
+ // See https://cyber.harvard.edu/rss/rss.html#ltcommentsgtSubelementOfLtitemgt
+ if url.IsAbsoluteURL(commentsURL) {
+ return commentsURL
+ }
}
}