From bf632fad2e19e9ece4db5957f05727f373541917 Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Sat, 4 Jan 2020 15:18:24 -0800 Subject: Allow only absolute URLs in comments URL Some feeds are using invalid URLs (random text). --- reader/rss/parser_test.go | 25 +++++++++++++++++++++++++ reader/rss/rss.go | 7 ++++++- 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'reader/rss') diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index dd1d261..b7ebd0a 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -837,6 +837,31 @@ func TestParseEntryWithCommentsURL(t *testing.T) { } } +func TestParseEntryWithInvalidCommentsURL(t *testing.T) { + data := ` + + + https://example.org/ + + Item 1 + https://example.org/item1 + + Some text + + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].CommentsURL != "" { + t.Errorf("Incorrect entry comments URL, got: %q", feed.Entries[0].CommentsURL) + } +} + func TestParseInvalidXml(t *testing.T) { data := `garbage` _, err := Parse(bytes.NewBufferString(data)) diff --git a/reader/rss/rss.go b/reader/rss/rss.go index fd120cb..172f6f8 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -317,7 +317,12 @@ func (r *rssItem) entryEnclosures() model.EnclosureList { func (r *rssItem) entryCommentsURL() string { for _, commentLink := range r.CommentLinks { if commentLink.XMLName.Space == "" { - return strings.TrimSpace(commentLink.Data) + commentsURL := strings.TrimSpace(commentLink.Data) + // The comments URL is supposed to be absolute (some feeds publishes incorrect comments URL) + // See https://cyber.harvard.edu/rss/rss.html#ltcommentsgtSubelementOfLtitemgt + if url.IsAbsoluteURL(commentsURL) { + return commentsURL + } } } -- cgit v1.2.3