aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/rss/rss.go
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2017-11-20 19:17:04 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2017-11-20 19:17:04 -0800
commitd5838b67340ca83dcc32f6d1775c183188ec5e7a (patch)
treea71705463458e5d2a6f74b0323119c3376f89b97 /reader/rss/rss.go
parentc26787f47667f69a2d3e85db01fe20b9bc70bc9a (diff)
Move feed parsers packages in reader package
Diffstat (limited to 'reader/rss/rss.go')
-rw-r--r--reader/rss/rss.go235
1 files changed, 235 insertions, 0 deletions
diff --git a/reader/rss/rss.go b/reader/rss/rss.go
new file mode 100644
index 0000000..2c63514
--- /dev/null
+++ b/reader/rss/rss.go
@@ -0,0 +1,235 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package rss
+
+import (
+ "encoding/xml"
+ "log"
+ "path"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/miniflux/miniflux2/helper"
+ "github.com/miniflux/miniflux2/model"
+ "github.com/miniflux/miniflux2/reader/date"
+ "github.com/miniflux/miniflux2/reader/processor"
+ "github.com/miniflux/miniflux2/reader/sanitizer"
+)
+
+type rssFeed struct {
+ XMLName xml.Name `xml:"rss"`
+ Version string `xml:"version,attr"`
+ Title string `xml:"channel>title"`
+ Links []rssLink `xml:"channel>link"`
+ Language string `xml:"channel>language"`
+ Description string `xml:"channel>description"`
+ PubDate string `xml:"channel>pubDate"`
+ ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"`
+ Items []rssItem `xml:"channel>item"`
+}
+
+type rssLink struct {
+ XMLName xml.Name
+ Data string `xml:",chardata"`
+ Href string `xml:"href,attr"`
+ Rel string `xml:"rel,attr"`
+}
+
+type rssItem struct {
+ GUID string `xml:"guid"`
+ Title string `xml:"title"`
+ Links []rssLink `xml:"link"`
+ OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
+ Description string `xml:"description"`
+ Content string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
+ PubDate string `xml:"pubDate"`
+ Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
+ Authors []rssAuthor `xml:"author"`
+ Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
+ Enclosures []rssEnclosure `xml:"enclosure"`
+ OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
+}
+
+type rssAuthor struct {
+ XMLName xml.Name
+ Data string `xml:",chardata"`
+ Name string `xml:"name"`
+}
+
+type rssEnclosure struct {
+ URL string `xml:"url,attr"`
+ Type string `xml:"type,attr"`
+ Length string `xml:"length,attr"`
+}
+
+func (r *rssFeed) GetSiteURL() string {
+ for _, element := range r.Links {
+ if element.XMLName.Space == "" {
+ return element.Data
+ }
+ }
+
+ return ""
+}
+
+func (r *rssFeed) GetFeedURL() string {
+ for _, element := range r.Links {
+ if element.XMLName.Space == "http://www.w3.org/2005/Atom" {
+ return element.Href
+ }
+ }
+
+ return ""
+}
+
+func (r *rssFeed) Transform() *model.Feed {
+ feed := new(model.Feed)
+ feed.SiteURL = r.GetSiteURL()
+ feed.FeedURL = r.GetFeedURL()
+ feed.Title = sanitizer.StripTags(r.Title)
+
+ if feed.Title == "" {
+ feed.Title = feed.SiteURL
+ }
+
+ for _, item := range r.Items {
+ entry := item.Transform()
+
+ if entry.Author == "" && r.ItunesAuthor != "" {
+ entry.Author = r.ItunesAuthor
+ }
+ entry.Author = sanitizer.StripTags(entry.Author)
+
+ if entry.URL == "" {
+ entry.URL = feed.SiteURL
+ }
+
+ feed.Entries = append(feed.Entries, entry)
+ }
+
+ return feed
+}
+func (r *rssItem) GetDate() time.Time {
+ value := r.PubDate
+ if r.Date != "" {
+ value = r.Date
+ }
+
+ if value != "" {
+ result, err := date.Parse(value)
+ if err != nil {
+ log.Println(err)
+ return time.Now()
+ }
+
+ return result
+ }
+
+ return time.Now()
+}
+
+func (r *rssItem) GetAuthor() string {
+ for _, element := range r.Authors {
+ if element.Name != "" {
+ return element.Name
+ }
+
+ if element.Data != "" {
+ return element.Data
+ }
+ }
+
+ return r.Creator
+}
+
+func (r *rssItem) GetHash() string {
+ for _, value := range []string{r.GUID, r.GetURL()} {
+ if value != "" {
+ return helper.Hash(value)
+ }
+ }
+
+ return ""
+}
+
+func (r *rssItem) GetContent() string {
+ if r.Content != "" {
+ return r.Content
+ }
+
+ return r.Description
+}
+
+func (r *rssItem) GetURL() string {
+ if r.OriginalLink != "" {
+ return r.OriginalLink
+ }
+
+ for _, link := range r.Links {
+ if link.XMLName.Space == "http://www.w3.org/2005/Atom" && link.Href != "" && isValidLinkRelation(link.Rel) {
+ return link.Href
+ }
+
+ if link.Data != "" {
+ return link.Data
+ }
+ }
+
+ return ""
+}
+
+func (r *rssItem) GetEnclosures() model.EnclosureList {
+ enclosures := make(model.EnclosureList, 0)
+
+ for _, enclosure := range r.Enclosures {
+ length, _ := strconv.Atoi(enclosure.Length)
+ enclosureURL := enclosure.URL
+
+ if r.OrigEnclosureLink != "" {
+ filename := path.Base(r.OrigEnclosureLink)
+ if strings.Contains(enclosureURL, filename) {
+ enclosureURL = r.OrigEnclosureLink
+ }
+ }
+
+ enclosures = append(enclosures, &model.Enclosure{
+ URL: enclosureURL,
+ MimeType: enclosure.Type,
+ Size: length,
+ })
+ }
+
+ return enclosures
+}
+
+func (r *rssItem) Transform() *model.Entry {
+ entry := new(model.Entry)
+ entry.URL = r.GetURL()
+ entry.Date = r.GetDate()
+ entry.Author = r.GetAuthor()
+ entry.Hash = r.GetHash()
+ entry.Content = processor.ItemContentProcessor(entry.URL, r.GetContent())
+ entry.Title = sanitizer.StripTags(strings.Trim(r.Title, " \n\t"))
+ entry.Enclosures = r.GetEnclosures()
+
+ if entry.Title == "" {
+ entry.Title = entry.URL
+ }
+
+ return entry
+}
+
+func isValidLinkRelation(rel string) bool {
+ switch rel {
+ case "", "alternate", "enclosure", "related", "self", "via":
+ return true
+ default:
+ if strings.HasPrefix(rel, "http") {
+ return true
+ }
+ return false
+ }
+}