From cf8af56a994ba8ad77f90cbd66d6ff3219493676 Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Mon, 20 Nov 2017 15:15:10 -0800 Subject: Handle RSS feeds without entry links --- reader/feed/rss/parser.go | 11 ++++--- reader/feed/rss/parser_test.go | 25 +++++++++++++++ reader/feed/rss/rss.go | 71 ++++++++++++++++++++++-------------------- 3 files changed, 69 insertions(+), 38 deletions(-) (limited to 'reader/feed') diff --git a/reader/feed/rss/parser.go b/reader/feed/rss/parser.go index 52310e4..247c789 100644 --- a/reader/feed/rss/parser.go +++ b/reader/feed/rss/parser.go @@ -7,22 +7,23 @@ package rss import ( "encoding/xml" "fmt" - "github.com/miniflux/miniflux2/model" "io" + "github.com/miniflux/miniflux2/model" + "golang.org/x/net/html/charset" ) // Parse returns a normalized feed struct. func Parse(data io.Reader) (*model.Feed, error) { - rssFeed := new(RssFeed) + feed := new(rssFeed) decoder := xml.NewDecoder(data) decoder.CharsetReader = charset.NewReaderLabel - err := decoder.Decode(rssFeed) + err := decoder.Decode(feed) if err != nil { - return nil, fmt.Errorf("Unable to parse RSS feed: %v", err) + return nil, fmt.Errorf("unable to parse RSS feed: %v", err) } - return rssFeed.Transform(), nil + return feed.Transform(), nil } diff --git a/reader/feed/rss/parser_test.go b/reader/feed/rss/parser_test.go index 42dc965..8265da4 100644 --- a/reader/feed/rss/parser_test.go +++ b/reader/feed/rss/parser_test.go @@ -136,6 +136,31 @@ func TestParseEntryWithoutTitle(t *testing.T) { } } +func TestParseEntryWithoutLink(t *testing.T) { + data := ` + + + https://example.org/ + + 1234 + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].URL != "https://example.org/" { + t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL) + } + + if feed.Entries[0].Hash != "03ac674216f3e15c761ee1a5e255f067953623c8b388b4459e13f978d7c846f4" { + t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) + } +} + func TestParseFeedURLWithAtomLink(t *testing.T) { data := ` diff --git a/reader/feed/rss/rss.go b/reader/feed/rss/rss.go index d88ed6b..fd28943 100644 --- a/reader/feed/rss/rss.go +++ b/reader/feed/rss/rss.go @@ -6,38 +6,39 @@ package rss import ( "encoding/xml" - "github.com/miniflux/miniflux2/helper" - "github.com/miniflux/miniflux2/model" - "github.com/miniflux/miniflux2/reader/feed/date" - "github.com/miniflux/miniflux2/reader/processor" - "github.com/miniflux/miniflux2/reader/sanitizer" "log" "path" "strconv" "strings" "time" -) -type RssLink struct { - XMLName xml.Name - Data string `xml:",chardata"` - Href string `xml:"href,attr"` -} + "github.com/miniflux/miniflux2/helper" + "github.com/miniflux/miniflux2/model" + "github.com/miniflux/miniflux2/reader/feed/date" + "github.com/miniflux/miniflux2/reader/processor" + "github.com/miniflux/miniflux2/reader/sanitizer" +) -type RssFeed struct { +type rssFeed struct { XMLName xml.Name `xml:"rss"` Version string `xml:"version,attr"` Title string `xml:"channel>title"` - Links []RssLink `xml:"channel>link"` + Links []rssLink `xml:"channel>link"` Language string `xml:"channel>language"` Description string `xml:"channel>description"` PubDate string `xml:"channel>pubDate"` ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"` - Items []RssItem `xml:"channel>item"` + Items []rssItem `xml:"channel>item"` } -type RssItem struct { - Guid string `xml:"guid"` +type rssLink struct { + XMLName xml.Name + Data string `xml:",chardata"` + Href string `xml:"href,attr"` +} + +type rssItem struct { + GUID string `xml:"guid"` Title string `xml:"title"` Link string `xml:"link"` OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"` @@ -45,25 +46,25 @@ type RssItem struct { Content string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"` PubDate string `xml:"pubDate"` Date string `xml:"http://purl.org/dc/elements/1.1/ date"` - Authors []RssAuthor `xml:"author"` + Authors []rssAuthor `xml:"author"` Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"` - Enclosures []RssEnclosure `xml:"enclosure"` + Enclosures []rssEnclosure `xml:"enclosure"` OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"` } -type RssAuthor struct { +type rssAuthor struct { XMLName xml.Name Data string `xml:",chardata"` Name string `xml:"name"` } -type RssEnclosure struct { - Url string `xml:"url,attr"` +type rssEnclosure struct { + URL string `xml:"url,attr"` Type string `xml:"type,attr"` Length string `xml:"length,attr"` } -func (r *RssFeed) GetSiteURL() string { +func (r *rssFeed) GetSiteURL() string { for _, elem := range r.Links { if elem.XMLName.Space == "" { return elem.Data @@ -73,7 +74,7 @@ func (r *RssFeed) GetSiteURL() string { return "" } -func (r *RssFeed) GetFeedURL() string { +func (r *rssFeed) GetFeedURL() string { for _, elem := range r.Links { if elem.XMLName.Space == "http://www.w3.org/2005/Atom" { return elem.Href @@ -83,7 +84,7 @@ func (r *RssFeed) GetFeedURL() string { return "" } -func (r *RssFeed) Transform() *model.Feed { +func (r *rssFeed) Transform() *model.Feed { feed := new(model.Feed) feed.SiteURL = r.GetSiteURL() feed.FeedURL = r.GetFeedURL() @@ -101,12 +102,16 @@ func (r *RssFeed) Transform() *model.Feed { } entry.Author = sanitizer.StripTags(entry.Author) + if entry.URL == "" { + entry.URL = feed.SiteURL + } + feed.Entries = append(feed.Entries, entry) } return feed } -func (i *RssItem) GetDate() time.Time { +func (i *rssItem) GetDate() time.Time { value := i.PubDate if i.Date != "" { value = i.Date @@ -125,7 +130,7 @@ func (i *RssItem) GetDate() time.Time { return time.Now() } -func (i *RssItem) GetAuthor() string { +func (i *rssItem) GetAuthor() string { for _, element := range i.Authors { if element.Name != "" { return element.Name @@ -139,8 +144,8 @@ func (i *RssItem) GetAuthor() string { return i.Creator } -func (i *RssItem) GetHash() string { - for _, value := range []string{i.Guid, i.Link} { +func (i *rssItem) GetHash() string { + for _, value := range []string{i.GUID, i.Link} { if value != "" { return helper.Hash(value) } @@ -149,7 +154,7 @@ func (i *RssItem) GetHash() string { return "" } -func (i *RssItem) GetContent() string { +func (i *rssItem) GetContent() string { if i.Content != "" { return i.Content } @@ -157,7 +162,7 @@ func (i *RssItem) GetContent() string { return i.Description } -func (i *RssItem) GetURL() string { +func (i *rssItem) GetURL() string { if i.OriginalLink != "" { return i.OriginalLink } @@ -165,12 +170,12 @@ func (i *RssItem) GetURL() string { return i.Link } -func (i *RssItem) GetEnclosures() model.EnclosureList { +func (i *rssItem) GetEnclosures() model.EnclosureList { enclosures := make(model.EnclosureList, 0) for _, enclosure := range i.Enclosures { length, _ := strconv.Atoi(enclosure.Length) - enclosureURL := enclosure.Url + enclosureURL := enclosure.URL if i.OrigEnclosureLink != "" { filename := path.Base(i.OrigEnclosureLink) @@ -189,7 +194,7 @@ func (i *RssItem) GetEnclosures() model.EnclosureList { return enclosures } -func (i *RssItem) Transform() *model.Entry { +func (i *rssItem) Transform() *model.Entry { entry := new(model.Entry) entry.URL = i.GetURL() entry.Date = i.GetDate() -- cgit v1.2.3