diff options
author | Frédéric Guillot <fred@miniflux.net> | 2020-01-04 15:18:24 -0800 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2020-01-04 15:54:16 -0800 |
commit | bf632fad2e19e9ece4db5957f05727f373541917 (patch) | |
tree | 34c8f90d920f55892c10a25bba47f07c580dfe74 | |
parent | 8cebd985a267f6fbcc363672ca81780dd5407eff (diff) |
Allow only absolute URLs in comments URL
Some feeds are using invalid URLs (random text).
-rw-r--r-- | reader/atom/atom_10.go | 11 | ||||
-rw-r--r-- | reader/atom/atom_10_test.go | 40 | ||||
-rw-r--r-- | reader/rss/parser_test.go | 25 | ||||
-rw-r--r-- | reader/rss/rss.go | 7 | ||||
-rw-r--r-- | url/url.go | 9 | ||||
-rw-r--r-- | url/url_test.go | 15 |
6 files changed, 105 insertions, 2 deletions
diff --git a/reader/atom/atom_10.go b/reader/atom/atom_10.go index 099cbed..708cc9f 100644 --- a/reader/atom/atom_10.go +++ b/reader/atom/atom_10.go @@ -84,7 +84,7 @@ func (a *atom10Entry) Transform() *model.Entry { entry.Content = a.entryContent() entry.Title = a.entryTitle() entry.Enclosures = a.entryEnclosures() - entry.CommentsURL = a.Links.firstLinkWithRelationAndType("replies", "text/html") + entry.CommentsURL = a.entryCommentsURL() return entry } @@ -194,6 +194,15 @@ func (a *atom10Entry) entryEnclosures() model.EnclosureList { return enclosures } +// See https://tools.ietf.org/html/rfc4685#section-3 +func (a *atom10Entry) entryCommentsURL() string { + commentsURL := a.Links.firstLinkWithRelationAndType("replies", "text/html") + if url.IsAbsoluteURL(commentsURL) { + return commentsURL + } + return "" +} + type atom10Text struct { Type string `xml:"type,attr"` Data string `xml:",chardata"` diff --git a/reader/atom/atom_10_test.go b/reader/atom/atom_10_test.go index 63127c4..d614691 100644 --- a/reader/atom/atom_10_test.go +++ b/reader/atom/atom_10_test.go @@ -777,3 +777,43 @@ func TestParseRepliesLinkRelation(t *testing.T) { t.Errorf("Incorrect entry comments URL, got: %s", feed.Entries[0].CommentsURL) } } + +func TestAbsoluteCommentsURL(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom" + xmlns:thr="http://purl.org/syndication/thread/1.0"> + <id>http://www.example.org/myfeed</id> + <title>My Example Feed</title> + <updated>2005-07-28T12:00:00Z</updated> + <link href="http://www.example.org/myfeed" /> + <author><name>James</name></author> + <entry> + <id>tag:entries.com,2005:1</id> + <title>My original entry</title> + <updated>2006-03-01T12:12:12Z</updated> + <link href="http://www.example.org/entries/1" /> + <link rel="replies" + type="text/html" + href="invalid url" + thr:count="10" thr:updated="2005-07-28T12:10:00Z" /> + <summary>This is my original entry</summary> + </entry> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].URL != "http://www.example.org/entries/1" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if feed.Entries[0].CommentsURL != "" { + t.Errorf("Incorrect entry comments URL, got: %s", feed.Entries[0].CommentsURL) + } +} diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index dd1d261..b7ebd0a 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -837,6 +837,31 @@ func TestParseEntryWithCommentsURL(t *testing.T) { } } +func TestParseEntryWithInvalidCommentsURL(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/"> + <channel> + <link>https://example.org/</link> + <item> + <title>Item 1</title> + <link>https://example.org/item1</link> + <comments> + Some text + </comments> + </item> + </channel> + </rss>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].CommentsURL != "" { + t.Errorf("Incorrect entry comments URL, got: %q", feed.Entries[0].CommentsURL) + } +} + func TestParseInvalidXml(t *testing.T) { data := `garbage` _, err := Parse(bytes.NewBufferString(data)) diff --git a/reader/rss/rss.go b/reader/rss/rss.go index fd120cb..172f6f8 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -317,7 +317,12 @@ func (r *rssItem) entryEnclosures() model.EnclosureList { func (r *rssItem) entryCommentsURL() string { for _, commentLink := range r.CommentLinks { if commentLink.XMLName.Space == "" { - return strings.TrimSpace(commentLink.Data) + commentsURL := strings.TrimSpace(commentLink.Data) + // The comments URL is supposed to be absolute (some feeds publishes incorrect comments URL) + // See https://cyber.harvard.edu/rss/rss.html#ltcommentsgtSubelementOfLtitemgt + if url.IsAbsoluteURL(commentsURL) { + return commentsURL + } } } @@ -11,6 +11,15 @@ import ( "strings" ) +// IsAbsoluteURL returns true if the link is absolute. +func IsAbsoluteURL(link string) bool { + u, err := url.Parse(link) + if err != nil { + return false + } + return u.IsAbs() +} + // AbsoluteURL converts the input URL as absolute URL if necessary. func AbsoluteURL(baseURL, input string) (string, error) { if strings.HasPrefix(input, "//") { diff --git a/url/url_test.go b/url/url_test.go index 56b6e13..ea488cf 100644 --- a/url/url_test.go +++ b/url/url_test.go @@ -6,6 +6,21 @@ package url // import "miniflux.app/url" import "testing" +func TestIsAbsoluteURL(t *testing.T) { + scenarios := map[string]bool{ + "https://example.org/file.pdf": true, + "magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7": true, + "invalid url": false, + } + + for input, expected := range scenarios { + actual := IsAbsoluteURL(input) + if actual != expected { + t.Errorf(`Unexpected result, got %v instead of %v for %q`, actual, expected, input) + } + } +} + func TestAbsoluteURL(t *testing.T) { scenarios := [][]string{ []string{"https://example.org/path/file.ext", "https://example.org/folder/", "/path/file.ext"}, |