diff options
Diffstat (limited to 'reader/rss/rss.go')
-rw-r--r-- | reader/rss/rss.go | 264 |
1 files changed, 148 insertions, 116 deletions
diff --git a/reader/rss/rss.go b/reader/rss/rss.go index fb0c5bf..fd120cb 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -20,16 +20,86 @@ import ( "miniflux.app/url" ) +// Specs: https://cyber.harvard.edu/rss/rss.html type rssFeed struct { - XMLName xml.Name `xml:"rss"` - Version string `xml:"version,attr"` - Title string `xml:"channel>title"` - Links []rssLink `xml:"channel>link"` - Language string `xml:"channel>language"` - Description string `xml:"channel>description"` - PubDate string `xml:"channel>pubDate"` - ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"` - Items []rssItem `xml:"channel>item"` + XMLName xml.Name `xml:"rss"` + Version string `xml:"version,attr"` + Title string `xml:"channel>title"` + Links []rssLink `xml:"channel>link"` + Language string `xml:"channel>language"` + Description string `xml:"channel>description"` + PubDate string `xml:"channel>pubDate"` + ManagingEditor string `xml:"channel>managingEditor"` + Webmaster string `xml:"channel>webMaster"` + Items []rssItem `xml:"channel>item"` + PodcastFeedElement +} + +func (r *rssFeed) Transform() *model.Feed { + feed := new(model.Feed) + feed.SiteURL = r.siteURL() + feed.FeedURL = r.feedURL() + feed.Title = strings.TrimSpace(r.Title) + + if feed.Title == "" { + feed.Title = feed.SiteURL + } + + for _, item := range r.Items { + entry := item.Transform() + if entry.Author == "" { + entry.Author = r.feedAuthor() + } + entry.Author = sanitizer.StripTags(entry.Author) + + if entry.URL == "" { + entry.URL = feed.SiteURL + } else { + entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL) + if err == nil { + entry.URL = entryURL + } + } + + if entry.Title == "" { + entry.Title = entry.URL + } + + feed.Entries = append(feed.Entries, entry) + } + + return feed +} + +func (r *rssFeed) siteURL() string { + for _, element := range r.Links { + if element.XMLName.Space == "" { + return strings.TrimSpace(element.Data) + } + } + + return "" +} + +func (r *rssFeed) feedURL() string { + for _, element := range r.Links { + if element.XMLName.Space == "http://www.w3.org/2005/Atom" { + return strings.TrimSpace(element.Href) + } + } + + return "" +} + +func (r rssFeed) feedAuthor() string { + author := r.PodcastAuthor() + switch { + case r.ManagingEditor != "": + author = r.ManagingEditor + case r.Webmaster != "": + author = r.Webmaster + } + return strings.TrimSpace(author) } type rssLink struct { @@ -48,6 +118,7 @@ type rssAuthor struct { XMLName xml.Name Data string `xml:",chardata"` Name string `xml:"name"` + Email string `xml:"email"` Inner string `xml:",innerxml"` } @@ -66,83 +137,37 @@ func (enclosure *rssEnclosure) Size() int64 { } type rssItem struct { - GUID string `xml:"guid"` - Title string `xml:"title"` - Links []rssLink `xml:"link"` - OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"` - CommentLinks []rssCommentLink `xml:"comments"` - Description string `xml:"description"` - EncodedContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"` - PubDate string `xml:"pubDate"` - Date string `xml:"http://purl.org/dc/elements/1.1/ date"` - Authors []rssAuthor `xml:"author"` - Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"` - EnclosureLinks []rssEnclosure `xml:"enclosure"` - OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"` + GUID string `xml:"guid"` + Title string `xml:"title"` + Links []rssLink `xml:"link"` + Description string `xml:"description"` + PubDate string `xml:"pubDate"` + Authors []rssAuthor `xml:"author"` + CommentLinks []rssCommentLink `xml:"comments"` + EnclosureLinks []rssEnclosure `xml:"enclosure"` + DublinCoreElement + FeedBurnerElement + PodcastEntryElement media.Element } -func (r *rssFeed) SiteURL() string { - for _, element := range r.Links { - if element.XMLName.Space == "" { - return strings.TrimSpace(element.Data) - } - } - - return "" -} - -func (r *rssFeed) FeedURL() string { - for _, element := range r.Links { - if element.XMLName.Space == "http://www.w3.org/2005/Atom" { - return strings.TrimSpace(element.Href) - } - } - - return "" -} - -func (r *rssFeed) Transform() *model.Feed { - feed := new(model.Feed) - feed.SiteURL = r.SiteURL() - feed.FeedURL = r.FeedURL() - feed.Title = strings.TrimSpace(r.Title) - - if feed.Title == "" { - feed.Title = feed.SiteURL - } - - for _, item := range r.Items { - entry := item.Transform() - - if entry.Author == "" && r.ItunesAuthor != "" { - entry.Author = r.ItunesAuthor - } - entry.Author = strings.TrimSpace(sanitizer.StripTags(entry.Author)) - - if entry.URL == "" { - entry.URL = feed.SiteURL - } else { - entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL) - if err == nil { - entry.URL = entryURL - } - } - - if entry.Title == "" { - entry.Title = entry.URL - } - - feed.Entries = append(feed.Entries, entry) - } - - return feed +func (r *rssItem) Transform() *model.Entry { + entry := new(model.Entry) + entry.URL = r.entryURL() + entry.CommentsURL = r.entryCommentsURL() + entry.Date = r.entryDate() + entry.Author = r.entryAuthor() + entry.Hash = r.entryHash() + entry.Content = r.entryContent() + entry.Title = r.entryTitle() + entry.Enclosures = r.entryEnclosures() + return entry } -func (r *rssItem) PublishedDate() time.Time { +func (r *rssItem) entryDate() time.Time { value := r.PubDate - if r.Date != "" { - value = r.Date + if r.DublinCoreDate != "" { + value = r.DublinCoreDate } if value != "" { @@ -158,22 +183,37 @@ func (r *rssItem) PublishedDate() time.Time { return time.Now() } -func (r *rssItem) Author() string { - for _, element := range r.Authors { - if element.Name != "" { - return element.Name +func (r *rssItem) entryAuthor() string { + author := "" + + for _, rssAuthor := range r.Authors { + switch rssAuthor.XMLName.Space { + case "http://www.itunes.com/dtds/podcast-1.0.dtd", "http://www.google.com/schemas/play-podcasts/1.0": + author = rssAuthor.Data + case "http://www.w3.org/2005/Atom": + if rssAuthor.Name != "" { + author = rssAuthor.Name + } else if rssAuthor.Email != "" { + author = rssAuthor.Email + } + default: + if rssAuthor.Name != "" { + author = rssAuthor.Name + } else { + author = rssAuthor.Inner + } } + } - if element.Inner != "" { - return element.Inner - } + if author == "" { + author = r.DublinCoreCreator } - return r.Creator + return strings.TrimSpace(author) } -func (r *rssItem) Hash() string { - for _, value := range []string{r.GUID, r.URL()} { +func (r *rssItem) entryHash() string { + for _, value := range []string{r.GUID, r.entryURL()} { if value != "" { return crypto.Hash(value) } @@ -182,17 +222,22 @@ func (r *rssItem) Hash() string { return "" } -func (r *rssItem) Content() string { - if r.EncodedContent != "" { - return r.EncodedContent - } +func (r *rssItem) entryTitle() string { + return strings.TrimSpace(sanitizer.StripTags(r.Title)) +} - return r.Description +func (r *rssItem) entryContent() string { + for _, value := range []string{r.DublinCoreContent, r.Description, r.PodcastDescription()} { + if value != "" { + return value + } + } + return "" } -func (r *rssItem) URL() string { - if r.OriginalLink != "" { - return r.OriginalLink +func (r *rssItem) entryURL() string { + if r.FeedBurnerLink != "" { + return r.FeedBurnerLink } for _, link := range r.Links { @@ -208,7 +253,7 @@ func (r *rssItem) URL() string { return "" } -func (r *rssItem) Enclosures() model.EnclosureList { +func (r *rssItem) entryEnclosures() model.EnclosureList { enclosures := make(model.EnclosureList, 0) duplicates := make(map[string]bool, 0) @@ -226,10 +271,10 @@ func (r *rssItem) Enclosures() model.EnclosureList { for _, enclosure := range r.EnclosureLinks { enclosureURL := enclosure.URL - if r.OrigEnclosureLink != "" { - filename := path.Base(r.OrigEnclosureLink) + if r.FeedBurnerEnclosureLink != "" { + filename := path.Base(r.FeedBurnerEnclosureLink) if strings.Contains(enclosureURL, filename) { - enclosureURL = r.OrigEnclosureLink + enclosureURL = r.FeedBurnerEnclosureLink } } @@ -269,7 +314,7 @@ func (r *rssItem) Enclosures() model.EnclosureList { return enclosures } -func (r *rssItem) CommentsURL() string { +func (r *rssItem) entryCommentsURL() string { for _, commentLink := range r.CommentLinks { if commentLink.XMLName.Space == "" { return strings.TrimSpace(commentLink.Data) @@ -279,19 +324,6 @@ func (r *rssItem) CommentsURL() string { return "" } -func (r *rssItem) Transform() *model.Entry { - entry := new(model.Entry) - entry.URL = r.URL() - entry.CommentsURL = r.CommentsURL() - entry.Date = r.PublishedDate() - entry.Author = r.Author() - entry.Hash = r.Hash() - entry.Content = r.Content() - entry.Title = strings.TrimSpace(r.Title) - entry.Enclosures = r.Enclosures() - return entry -} - func isValidLinkRelation(rel string) bool { switch rel { case "", "alternate", "enclosure", "related", "self", "via": |