diff options
author | Frédéric Guillot <fred@miniflux.net> | 2017-11-22 14:52:31 -0800 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2017-11-22 14:52:31 -0800 |
commit | 2b641cc224c39487297d3e19b2dc7af316deda14 (patch) | |
tree | 7f30b391b8c1f163f8ec2a44e1edfe91a30c0cfc /reader | |
parent | 3b40ce49603e106a38a156b3749f5f612914cd5d (diff) |
Improve feed parsers
Diffstat (limited to 'reader')
-rw-r--r-- | reader/atom/atom.go | 17 | ||||
-rw-r--r-- | reader/json/json.go | 14 | ||||
-rw-r--r-- | reader/rdf/rdf.go | 5 | ||||
-rw-r--r-- | reader/rss/rss.go | 16 |
4 files changed, 27 insertions, 25 deletions
diff --git a/reader/atom/atom.go b/reader/atom/atom.go index 47c1df1..a2bd2c3 100644 --- a/reader/atom/atom.go +++ b/reader/atom/atom.go @@ -15,7 +15,6 @@ import ( "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/date" "github.com/miniflux/miniflux2/reader/processor" - "github.com/miniflux/miniflux2/reader/sanitizer" ) type atomFeed struct { @@ -64,7 +63,7 @@ func (a *atomFeed) Transform() *model.Feed { feed := new(model.Feed) feed.FeedURL = getRelationURL(a.Links, "self") feed.SiteURL = getURL(a.Links) - feed.Title = sanitizer.StripTags(a.Title) + feed.Title = strings.TrimSpace(a.Title) if feed.Title == "" { feed.Title = feed.SiteURL @@ -86,10 +85,10 @@ func (a *atomEntry) Transform() *model.Entry { entry := new(model.Entry) entry.URL = getURL(a.Links) entry.Date = getDate(a) - entry.Author = sanitizer.StripTags(getAuthor(a.Author)) + entry.Author = getAuthor(a.Author) entry.Hash = getHash(a) entry.Content = processor.ItemContentProcessor(entry.URL, getContent(a)) - entry.Title = sanitizer.StripTags(strings.Trim(a.Title, " \n\t")) + entry.Title = strings.TrimSpace(a.Title) entry.Enclosures = getEnclosures(a) if entry.Title == "" { @@ -102,11 +101,11 @@ func (a *atomEntry) Transform() *model.Entry { func getURL(links []atomLink) string { for _, link := range links { if strings.ToLower(link.Rel) == "alternate" { - return link.URL + return strings.TrimSpace(link.URL) } if link.Rel == "" && link.Type == "" { - return link.URL + return strings.TrimSpace(link.URL) } } @@ -116,7 +115,7 @@ func getURL(links []atomLink) string { func getRelationURL(links []atomLink, relation string) string { for _, link := range links { if strings.ToLower(link.Rel) == relation { - return link.URL + return strings.TrimSpace(link.URL) } } @@ -182,11 +181,11 @@ func getEnclosures(a *atomEntry) model.EnclosureList { func getAuthor(author atomAuthor) string { if author.Name != "" { - return author.Name + return strings.TrimSpace(author.Name) } if author.Email != "" { - return author.Email + return strings.TrimSpace(author.Email) } return "" diff --git a/reader/json/json.go b/reader/json/json.go index cd6a1c8..3401232 100644 --- a/reader/json/json.go +++ b/reader/json/json.go @@ -9,11 +9,12 @@ import ( "strings" "time" + "github.com/miniflux/miniflux2/reader/sanitizer" + "github.com/miniflux/miniflux2/helper" "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/date" "github.com/miniflux/miniflux2/reader/processor" - "github.com/miniflux/miniflux2/reader/sanitizer" ) type jsonFeed struct { @@ -59,7 +60,7 @@ func (j *jsonFeed) Transform() *model.Feed { feed := new(model.Feed) feed.FeedURL = j.FeedURL feed.SiteURL = j.SiteURL - feed.Title = sanitizer.StripTags(j.Title) + feed.Title = strings.TrimSpace(j.Title) if feed.Title == "" { feed.Title = feed.SiteURL @@ -110,7 +111,7 @@ func (j *jsonItem) GetHash() string { func (j *jsonItem) GetTitle() string { for _, value := range []string{j.Title, j.Summary, j.Text, j.HTML} { if value != "" { - return truncate(value) + return truncate(sanitizer.StripTags(value)) } } @@ -145,17 +146,17 @@ func (j *jsonItem) Transform() *model.Entry { entry := new(model.Entry) entry.URL = j.URL entry.Date = j.GetDate() - entry.Author = sanitizer.StripTags(j.GetAuthor()) + entry.Author = j.GetAuthor() entry.Hash = j.GetHash() entry.Content = processor.ItemContentProcessor(entry.URL, j.GetContent()) - entry.Title = sanitizer.StripTags(strings.Trim(j.GetTitle(), " \n\t")) + entry.Title = strings.TrimSpace(j.GetTitle()) entry.Enclosures = j.GetEnclosures() return entry } func getAuthor(author jsonAuthor) string { if author.Name != "" { - return author.Name + return strings.TrimSpace(author.Name) } return "" @@ -163,6 +164,7 @@ func getAuthor(author jsonAuthor) string { func truncate(str string) string { max := 100 + str = strings.TrimSpace(str) if len(str) > max { return str[:max] + "..." } diff --git a/reader/rdf/rdf.go b/reader/rdf/rdf.go index 8622d66..9b8ccdc 100644 --- a/reader/rdf/rdf.go +++ b/reader/rdf/rdf.go @@ -6,6 +6,7 @@ package rdf import ( "encoding/xml" + "strings" "time" "github.com/miniflux/miniflux2/helper" @@ -54,8 +55,8 @@ type rdfItem struct { func (r *rdfItem) Transform() *model.Entry { entry := new(model.Entry) - entry.Title = sanitizer.StripTags(r.Title) - entry.Author = sanitizer.StripTags(r.Creator) + entry.Title = strings.TrimSpace(r.Title) + entry.Author = strings.TrimSpace(r.Creator) entry.URL = r.Link entry.Content = processor.ItemContentProcessor(entry.URL, r.Description) entry.Hash = getHash(r) diff --git a/reader/rss/rss.go b/reader/rss/rss.go index 2c63514..5573986 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -16,7 +16,6 @@ import ( "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/date" "github.com/miniflux/miniflux2/reader/processor" - "github.com/miniflux/miniflux2/reader/sanitizer" ) type rssFeed struct { @@ -68,7 +67,7 @@ type rssEnclosure struct { func (r *rssFeed) GetSiteURL() string { for _, element := range r.Links { if element.XMLName.Space == "" { - return element.Data + return strings.TrimSpace(element.Data) } } @@ -78,7 +77,7 @@ func (r *rssFeed) GetSiteURL() string { func (r *rssFeed) GetFeedURL() string { for _, element := range r.Links { if element.XMLName.Space == "http://www.w3.org/2005/Atom" { - return element.Href + return strings.TrimSpace(element.Href) } } @@ -89,7 +88,7 @@ func (r *rssFeed) Transform() *model.Feed { feed := new(model.Feed) feed.SiteURL = r.GetSiteURL() feed.FeedURL = r.GetFeedURL() - feed.Title = sanitizer.StripTags(r.Title) + feed.Title = strings.TrimSpace(r.Title) if feed.Title == "" { feed.Title = feed.SiteURL @@ -101,7 +100,7 @@ func (r *rssFeed) Transform() *model.Feed { if entry.Author == "" && r.ItunesAuthor != "" { entry.Author = r.ItunesAuthor } - entry.Author = sanitizer.StripTags(entry.Author) + entry.Author = strings.TrimSpace(entry.Author) if entry.URL == "" { entry.URL = feed.SiteURL @@ -112,6 +111,7 @@ func (r *rssFeed) Transform() *model.Feed { return feed } + func (r *rssItem) GetDate() time.Time { value := r.PubDate if r.Date != "" { @@ -170,11 +170,11 @@ func (r *rssItem) GetURL() string { for _, link := range r.Links { if link.XMLName.Space == "http://www.w3.org/2005/Atom" && link.Href != "" && isValidLinkRelation(link.Rel) { - return link.Href + return strings.TrimSpace(link.Href) } if link.Data != "" { - return link.Data + return strings.TrimSpace(link.Data) } } @@ -212,7 +212,7 @@ func (r *rssItem) Transform() *model.Entry { entry.Author = r.GetAuthor() entry.Hash = r.GetHash() entry.Content = processor.ItemContentProcessor(entry.URL, r.GetContent()) - entry.Title = sanitizer.StripTags(strings.Trim(r.Title, " \n\t")) + entry.Title = strings.TrimSpace(r.Title) entry.Enclosures = r.GetEnclosures() if entry.Title == "" { |