aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/rss/rss.go
diff options
context:
space:
mode:
Diffstat (limited to 'reader/rss/rss.go')
-rw-r--r--reader/rss/rss.go264
1 files changed, 148 insertions, 116 deletions
diff --git a/reader/rss/rss.go b/reader/rss/rss.go
index fb0c5bf..fd120cb 100644
--- a/reader/rss/rss.go
+++ b/reader/rss/rss.go
@@ -20,16 +20,86 @@ import (
"miniflux.app/url"
)
+// Specs: https://cyber.harvard.edu/rss/rss.html
type rssFeed struct {
- XMLName xml.Name `xml:"rss"`
- Version string `xml:"version,attr"`
- Title string `xml:"channel>title"`
- Links []rssLink `xml:"channel>link"`
- Language string `xml:"channel>language"`
- Description string `xml:"channel>description"`
- PubDate string `xml:"channel>pubDate"`
- ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"`
- Items []rssItem `xml:"channel>item"`
+ XMLName xml.Name `xml:"rss"`
+ Version string `xml:"version,attr"`
+ Title string `xml:"channel>title"`
+ Links []rssLink `xml:"channel>link"`
+ Language string `xml:"channel>language"`
+ Description string `xml:"channel>description"`
+ PubDate string `xml:"channel>pubDate"`
+ ManagingEditor string `xml:"channel>managingEditor"`
+ Webmaster string `xml:"channel>webMaster"`
+ Items []rssItem `xml:"channel>item"`
+ PodcastFeedElement
+}
+
+func (r *rssFeed) Transform() *model.Feed {
+ feed := new(model.Feed)
+ feed.SiteURL = r.siteURL()
+ feed.FeedURL = r.feedURL()
+ feed.Title = strings.TrimSpace(r.Title)
+
+ if feed.Title == "" {
+ feed.Title = feed.SiteURL
+ }
+
+ for _, item := range r.Items {
+ entry := item.Transform()
+ if entry.Author == "" {
+ entry.Author = r.feedAuthor()
+ }
+ entry.Author = sanitizer.StripTags(entry.Author)
+
+ if entry.URL == "" {
+ entry.URL = feed.SiteURL
+ } else {
+ entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL)
+ if err == nil {
+ entry.URL = entryURL
+ }
+ }
+
+ if entry.Title == "" {
+ entry.Title = entry.URL
+ }
+
+ feed.Entries = append(feed.Entries, entry)
+ }
+
+ return feed
+}
+
+func (r *rssFeed) siteURL() string {
+ for _, element := range r.Links {
+ if element.XMLName.Space == "" {
+ return strings.TrimSpace(element.Data)
+ }
+ }
+
+ return ""
+}
+
+func (r *rssFeed) feedURL() string {
+ for _, element := range r.Links {
+ if element.XMLName.Space == "http://www.w3.org/2005/Atom" {
+ return strings.TrimSpace(element.Href)
+ }
+ }
+
+ return ""
+}
+
+func (r rssFeed) feedAuthor() string {
+ author := r.PodcastAuthor()
+ switch {
+ case r.ManagingEditor != "":
+ author = r.ManagingEditor
+ case r.Webmaster != "":
+ author = r.Webmaster
+ }
+ return strings.TrimSpace(author)
}
type rssLink struct {
@@ -48,6 +118,7 @@ type rssAuthor struct {
XMLName xml.Name
Data string `xml:",chardata"`
Name string `xml:"name"`
+ Email string `xml:"email"`
Inner string `xml:",innerxml"`
}
@@ -66,83 +137,37 @@ func (enclosure *rssEnclosure) Size() int64 {
}
type rssItem struct {
- GUID string `xml:"guid"`
- Title string `xml:"title"`
- Links []rssLink `xml:"link"`
- OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
- CommentLinks []rssCommentLink `xml:"comments"`
- Description string `xml:"description"`
- EncodedContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
- PubDate string `xml:"pubDate"`
- Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
- Authors []rssAuthor `xml:"author"`
- Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
- EnclosureLinks []rssEnclosure `xml:"enclosure"`
- OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
+ GUID string `xml:"guid"`
+ Title string `xml:"title"`
+ Links []rssLink `xml:"link"`
+ Description string `xml:"description"`
+ PubDate string `xml:"pubDate"`
+ Authors []rssAuthor `xml:"author"`
+ CommentLinks []rssCommentLink `xml:"comments"`
+ EnclosureLinks []rssEnclosure `xml:"enclosure"`
+ DublinCoreElement
+ FeedBurnerElement
+ PodcastEntryElement
media.Element
}
-func (r *rssFeed) SiteURL() string {
- for _, element := range r.Links {
- if element.XMLName.Space == "" {
- return strings.TrimSpace(element.Data)
- }
- }
-
- return ""
-}
-
-func (r *rssFeed) FeedURL() string {
- for _, element := range r.Links {
- if element.XMLName.Space == "http://www.w3.org/2005/Atom" {
- return strings.TrimSpace(element.Href)
- }
- }
-
- return ""
-}
-
-func (r *rssFeed) Transform() *model.Feed {
- feed := new(model.Feed)
- feed.SiteURL = r.SiteURL()
- feed.FeedURL = r.FeedURL()
- feed.Title = strings.TrimSpace(r.Title)
-
- if feed.Title == "" {
- feed.Title = feed.SiteURL
- }
-
- for _, item := range r.Items {
- entry := item.Transform()
-
- if entry.Author == "" && r.ItunesAuthor != "" {
- entry.Author = r.ItunesAuthor
- }
- entry.Author = strings.TrimSpace(sanitizer.StripTags(entry.Author))
-
- if entry.URL == "" {
- entry.URL = feed.SiteURL
- } else {
- entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL)
- if err == nil {
- entry.URL = entryURL
- }
- }
-
- if entry.Title == "" {
- entry.Title = entry.URL
- }
-
- feed.Entries = append(feed.Entries, entry)
- }
-
- return feed
+func (r *rssItem) Transform() *model.Entry {
+ entry := new(model.Entry)
+ entry.URL = r.entryURL()
+ entry.CommentsURL = r.entryCommentsURL()
+ entry.Date = r.entryDate()
+ entry.Author = r.entryAuthor()
+ entry.Hash = r.entryHash()
+ entry.Content = r.entryContent()
+ entry.Title = r.entryTitle()
+ entry.Enclosures = r.entryEnclosures()
+ return entry
}
-func (r *rssItem) PublishedDate() time.Time {
+func (r *rssItem) entryDate() time.Time {
value := r.PubDate
- if r.Date != "" {
- value = r.Date
+ if r.DublinCoreDate != "" {
+ value = r.DublinCoreDate
}
if value != "" {
@@ -158,22 +183,37 @@ func (r *rssItem) PublishedDate() time.Time {
return time.Now()
}
-func (r *rssItem) Author() string {
- for _, element := range r.Authors {
- if element.Name != "" {
- return element.Name
+func (r *rssItem) entryAuthor() string {
+ author := ""
+
+ for _, rssAuthor := range r.Authors {
+ switch rssAuthor.XMLName.Space {
+ case "http://www.itunes.com/dtds/podcast-1.0.dtd", "http://www.google.com/schemas/play-podcasts/1.0":
+ author = rssAuthor.Data
+ case "http://www.w3.org/2005/Atom":
+ if rssAuthor.Name != "" {
+ author = rssAuthor.Name
+ } else if rssAuthor.Email != "" {
+ author = rssAuthor.Email
+ }
+ default:
+ if rssAuthor.Name != "" {
+ author = rssAuthor.Name
+ } else {
+ author = rssAuthor.Inner
+ }
}
+ }
- if element.Inner != "" {
- return element.Inner
- }
+ if author == "" {
+ author = r.DublinCoreCreator
}
- return r.Creator
+ return strings.TrimSpace(author)
}
-func (r *rssItem) Hash() string {
- for _, value := range []string{r.GUID, r.URL()} {
+func (r *rssItem) entryHash() string {
+ for _, value := range []string{r.GUID, r.entryURL()} {
if value != "" {
return crypto.Hash(value)
}
@@ -182,17 +222,22 @@ func (r *rssItem) Hash() string {
return ""
}
-func (r *rssItem) Content() string {
- if r.EncodedContent != "" {
- return r.EncodedContent
- }
+func (r *rssItem) entryTitle() string {
+ return strings.TrimSpace(sanitizer.StripTags(r.Title))
+}
- return r.Description
+func (r *rssItem) entryContent() string {
+ for _, value := range []string{r.DublinCoreContent, r.Description, r.PodcastDescription()} {
+ if value != "" {
+ return value
+ }
+ }
+ return ""
}
-func (r *rssItem) URL() string {
- if r.OriginalLink != "" {
- return r.OriginalLink
+func (r *rssItem) entryURL() string {
+ if r.FeedBurnerLink != "" {
+ return r.FeedBurnerLink
}
for _, link := range r.Links {
@@ -208,7 +253,7 @@ func (r *rssItem) URL() string {
return ""
}
-func (r *rssItem) Enclosures() model.EnclosureList {
+func (r *rssItem) entryEnclosures() model.EnclosureList {
enclosures := make(model.EnclosureList, 0)
duplicates := make(map[string]bool, 0)
@@ -226,10 +271,10 @@ func (r *rssItem) Enclosures() model.EnclosureList {
for _, enclosure := range r.EnclosureLinks {
enclosureURL := enclosure.URL
- if r.OrigEnclosureLink != "" {
- filename := path.Base(r.OrigEnclosureLink)
+ if r.FeedBurnerEnclosureLink != "" {
+ filename := path.Base(r.FeedBurnerEnclosureLink)
if strings.Contains(enclosureURL, filename) {
- enclosureURL = r.OrigEnclosureLink
+ enclosureURL = r.FeedBurnerEnclosureLink
}
}
@@ -269,7 +314,7 @@ func (r *rssItem) Enclosures() model.EnclosureList {
return enclosures
}
-func (r *rssItem) CommentsURL() string {
+func (r *rssItem) entryCommentsURL() string {
for _, commentLink := range r.CommentLinks {
if commentLink.XMLName.Space == "" {
return strings.TrimSpace(commentLink.Data)
@@ -279,19 +324,6 @@ func (r *rssItem) CommentsURL() string {
return ""
}
-func (r *rssItem) Transform() *model.Entry {
- entry := new(model.Entry)
- entry.URL = r.URL()
- entry.CommentsURL = r.CommentsURL()
- entry.Date = r.PublishedDate()
- entry.Author = r.Author()
- entry.Hash = r.Hash()
- entry.Content = r.Content()
- entry.Title = strings.TrimSpace(r.Title)
- entry.Enclosures = r.Enclosures()
- return entry
-}
-
func isValidLinkRelation(rel string) bool {
switch rel {
case "", "alternate", "enclosure", "related", "self", "via":