aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2020-01-04 15:18:24 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2020-01-04 15:54:16 -0800
commitbf632fad2e19e9ece4db5957f05727f373541917 (patch)
tree34c8f90d920f55892c10a25bba47f07c580dfe74
parent8cebd985a267f6fbcc363672ca81780dd5407eff (diff)
Allow only absolute URLs in comments URL
Some feeds are using invalid URLs (random text).
-rw-r--r--reader/atom/atom_10.go11
-rw-r--r--reader/atom/atom_10_test.go40
-rw-r--r--reader/rss/parser_test.go25
-rw-r--r--reader/rss/rss.go7
-rw-r--r--url/url.go9
-rw-r--r--url/url_test.go15
6 files changed, 105 insertions, 2 deletions
diff --git a/reader/atom/atom_10.go b/reader/atom/atom_10.go
index 099cbed..708cc9f 100644
--- a/reader/atom/atom_10.go
+++ b/reader/atom/atom_10.go
@@ -84,7 +84,7 @@ func (a *atom10Entry) Transform() *model.Entry {
entry.Content = a.entryContent()
entry.Title = a.entryTitle()
entry.Enclosures = a.entryEnclosures()
- entry.CommentsURL = a.Links.firstLinkWithRelationAndType("replies", "text/html")
+ entry.CommentsURL = a.entryCommentsURL()
return entry
}
@@ -194,6 +194,15 @@ func (a *atom10Entry) entryEnclosures() model.EnclosureList {
return enclosures
}
+// See https://tools.ietf.org/html/rfc4685#section-3
+func (a *atom10Entry) entryCommentsURL() string {
+ commentsURL := a.Links.firstLinkWithRelationAndType("replies", "text/html")
+ if url.IsAbsoluteURL(commentsURL) {
+ return commentsURL
+ }
+ return ""
+}
+
type atom10Text struct {
Type string `xml:"type,attr"`
Data string `xml:",chardata"`
diff --git a/reader/atom/atom_10_test.go b/reader/atom/atom_10_test.go
index 63127c4..d614691 100644
--- a/reader/atom/atom_10_test.go
+++ b/reader/atom/atom_10_test.go
@@ -777,3 +777,43 @@ func TestParseRepliesLinkRelation(t *testing.T) {
t.Errorf("Incorrect entry comments URL, got: %s", feed.Entries[0].CommentsURL)
}
}
+
+func TestAbsoluteCommentsURL(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom"
+ xmlns:thr="http://purl.org/syndication/thread/1.0">
+ <id>http://www.example.org/myfeed</id>
+ <title>My Example Feed</title>
+ <updated>2005-07-28T12:00:00Z</updated>
+ <link href="http://www.example.org/myfeed" />
+ <author><name>James</name></author>
+ <entry>
+ <id>tag:entries.com,2005:1</id>
+ <title>My original entry</title>
+ <updated>2006-03-01T12:12:12Z</updated>
+ <link href="http://www.example.org/entries/1" />
+ <link rel="replies"
+ type="text/html"
+ href="invalid url"
+ thr:count="10" thr:updated="2005-07-28T12:10:00Z" />
+ <summary>This is my original entry</summary>
+ </entry>
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].URL != "http://www.example.org/entries/1" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+ }
+
+ if feed.Entries[0].CommentsURL != "" {
+ t.Errorf("Incorrect entry comments URL, got: %s", feed.Entries[0].CommentsURL)
+ }
+}
diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go
index dd1d261..b7ebd0a 100644
--- a/reader/rss/parser_test.go
+++ b/reader/rss/parser_test.go
@@ -837,6 +837,31 @@ func TestParseEntryWithCommentsURL(t *testing.T) {
}
}
+func TestParseEntryWithInvalidCommentsURL(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
+ <channel>
+ <link>https://example.org/</link>
+ <item>
+ <title>Item 1</title>
+ <link>https://example.org/item1</link>
+ <comments>
+ Some text
+ </comments>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].CommentsURL != "" {
+ t.Errorf("Incorrect entry comments URL, got: %q", feed.Entries[0].CommentsURL)
+ }
+}
+
func TestParseInvalidXml(t *testing.T) {
data := `garbage`
_, err := Parse(bytes.NewBufferString(data))
diff --git a/reader/rss/rss.go b/reader/rss/rss.go
index fd120cb..172f6f8 100644
--- a/reader/rss/rss.go
+++ b/reader/rss/rss.go
@@ -317,7 +317,12 @@ func (r *rssItem) entryEnclosures() model.EnclosureList {
func (r *rssItem) entryCommentsURL() string {
for _, commentLink := range r.CommentLinks {
if commentLink.XMLName.Space == "" {
- return strings.TrimSpace(commentLink.Data)
+ commentsURL := strings.TrimSpace(commentLink.Data)
+ // The comments URL is supposed to be absolute (some feeds publishes incorrect comments URL)
+ // See https://cyber.harvard.edu/rss/rss.html#ltcommentsgtSubelementOfLtitemgt
+ if url.IsAbsoluteURL(commentsURL) {
+ return commentsURL
+ }
}
}
diff --git a/url/url.go b/url/url.go
index b02348f..d0c627e 100644
--- a/url/url.go
+++ b/url/url.go
@@ -11,6 +11,15 @@ import (
"strings"
)
+// IsAbsoluteURL returns true if the link is absolute.
+func IsAbsoluteURL(link string) bool {
+ u, err := url.Parse(link)
+ if err != nil {
+ return false
+ }
+ return u.IsAbs()
+}
+
// AbsoluteURL converts the input URL as absolute URL if necessary.
func AbsoluteURL(baseURL, input string) (string, error) {
if strings.HasPrefix(input, "//") {
diff --git a/url/url_test.go b/url/url_test.go
index 56b6e13..ea488cf 100644
--- a/url/url_test.go
+++ b/url/url_test.go
@@ -6,6 +6,21 @@ package url // import "miniflux.app/url"
import "testing"
+func TestIsAbsoluteURL(t *testing.T) {
+ scenarios := map[string]bool{
+ "https://example.org/file.pdf": true,
+ "magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7": true,
+ "invalid url": false,
+ }
+
+ for input, expected := range scenarios {
+ actual := IsAbsoluteURL(input)
+ if actual != expected {
+ t.Errorf(`Unexpected result, got %v instead of %v for %q`, actual, expected, input)
+ }
+ }
+}
+
func TestAbsoluteURL(t *testing.T) {
scenarios := [][]string{
[]string{"https://example.org/path/file.ext", "https://example.org/folder/", "/path/file.ext"},