aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2017-11-22 14:52:31 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2017-11-22 14:52:31 -0800
commit2b641cc224c39487297d3e19b2dc7af316deda14 (patch)
tree7f30b391b8c1f163f8ec2a44e1edfe91a30c0cfc /reader
parent3b40ce49603e106a38a156b3749f5f612914cd5d (diff)
Improve feed parsers
Diffstat (limited to 'reader')
-rw-r--r--reader/atom/atom.go17
-rw-r--r--reader/json/json.go14
-rw-r--r--reader/rdf/rdf.go5
-rw-r--r--reader/rss/rss.go16
4 files changed, 27 insertions, 25 deletions
diff --git a/reader/atom/atom.go b/reader/atom/atom.go
index 47c1df1..a2bd2c3 100644
--- a/reader/atom/atom.go
+++ b/reader/atom/atom.go
@@ -15,7 +15,6 @@ import (
"github.com/miniflux/miniflux2/model"
"github.com/miniflux/miniflux2/reader/date"
"github.com/miniflux/miniflux2/reader/processor"
- "github.com/miniflux/miniflux2/reader/sanitizer"
)
type atomFeed struct {
@@ -64,7 +63,7 @@ func (a *atomFeed) Transform() *model.Feed {
feed := new(model.Feed)
feed.FeedURL = getRelationURL(a.Links, "self")
feed.SiteURL = getURL(a.Links)
- feed.Title = sanitizer.StripTags(a.Title)
+ feed.Title = strings.TrimSpace(a.Title)
if feed.Title == "" {
feed.Title = feed.SiteURL
@@ -86,10 +85,10 @@ func (a *atomEntry) Transform() *model.Entry {
entry := new(model.Entry)
entry.URL = getURL(a.Links)
entry.Date = getDate(a)
- entry.Author = sanitizer.StripTags(getAuthor(a.Author))
+ entry.Author = getAuthor(a.Author)
entry.Hash = getHash(a)
entry.Content = processor.ItemContentProcessor(entry.URL, getContent(a))
- entry.Title = sanitizer.StripTags(strings.Trim(a.Title, " \n\t"))
+ entry.Title = strings.TrimSpace(a.Title)
entry.Enclosures = getEnclosures(a)
if entry.Title == "" {
@@ -102,11 +101,11 @@ func (a *atomEntry) Transform() *model.Entry {
func getURL(links []atomLink) string {
for _, link := range links {
if strings.ToLower(link.Rel) == "alternate" {
- return link.URL
+ return strings.TrimSpace(link.URL)
}
if link.Rel == "" && link.Type == "" {
- return link.URL
+ return strings.TrimSpace(link.URL)
}
}
@@ -116,7 +115,7 @@ func getURL(links []atomLink) string {
func getRelationURL(links []atomLink, relation string) string {
for _, link := range links {
if strings.ToLower(link.Rel) == relation {
- return link.URL
+ return strings.TrimSpace(link.URL)
}
}
@@ -182,11 +181,11 @@ func getEnclosures(a *atomEntry) model.EnclosureList {
func getAuthor(author atomAuthor) string {
if author.Name != "" {
- return author.Name
+ return strings.TrimSpace(author.Name)
}
if author.Email != "" {
- return author.Email
+ return strings.TrimSpace(author.Email)
}
return ""
diff --git a/reader/json/json.go b/reader/json/json.go
index cd6a1c8..3401232 100644
--- a/reader/json/json.go
+++ b/reader/json/json.go
@@ -9,11 +9,12 @@ import (
"strings"
"time"
+ "github.com/miniflux/miniflux2/reader/sanitizer"
+
"github.com/miniflux/miniflux2/helper"
"github.com/miniflux/miniflux2/model"
"github.com/miniflux/miniflux2/reader/date"
"github.com/miniflux/miniflux2/reader/processor"
- "github.com/miniflux/miniflux2/reader/sanitizer"
)
type jsonFeed struct {
@@ -59,7 +60,7 @@ func (j *jsonFeed) Transform() *model.Feed {
feed := new(model.Feed)
feed.FeedURL = j.FeedURL
feed.SiteURL = j.SiteURL
- feed.Title = sanitizer.StripTags(j.Title)
+ feed.Title = strings.TrimSpace(j.Title)
if feed.Title == "" {
feed.Title = feed.SiteURL
@@ -110,7 +111,7 @@ func (j *jsonItem) GetHash() string {
func (j *jsonItem) GetTitle() string {
for _, value := range []string{j.Title, j.Summary, j.Text, j.HTML} {
if value != "" {
- return truncate(value)
+ return truncate(sanitizer.StripTags(value))
}
}
@@ -145,17 +146,17 @@ func (j *jsonItem) Transform() *model.Entry {
entry := new(model.Entry)
entry.URL = j.URL
entry.Date = j.GetDate()
- entry.Author = sanitizer.StripTags(j.GetAuthor())
+ entry.Author = j.GetAuthor()
entry.Hash = j.GetHash()
entry.Content = processor.ItemContentProcessor(entry.URL, j.GetContent())
- entry.Title = sanitizer.StripTags(strings.Trim(j.GetTitle(), " \n\t"))
+ entry.Title = strings.TrimSpace(j.GetTitle())
entry.Enclosures = j.GetEnclosures()
return entry
}
func getAuthor(author jsonAuthor) string {
if author.Name != "" {
- return author.Name
+ return strings.TrimSpace(author.Name)
}
return ""
@@ -163,6 +164,7 @@ func getAuthor(author jsonAuthor) string {
func truncate(str string) string {
max := 100
+ str = strings.TrimSpace(str)
if len(str) > max {
return str[:max] + "..."
}
diff --git a/reader/rdf/rdf.go b/reader/rdf/rdf.go
index 8622d66..9b8ccdc 100644
--- a/reader/rdf/rdf.go
+++ b/reader/rdf/rdf.go
@@ -6,6 +6,7 @@ package rdf
import (
"encoding/xml"
+ "strings"
"time"
"github.com/miniflux/miniflux2/helper"
@@ -54,8 +55,8 @@ type rdfItem struct {
func (r *rdfItem) Transform() *model.Entry {
entry := new(model.Entry)
- entry.Title = sanitizer.StripTags(r.Title)
- entry.Author = sanitizer.StripTags(r.Creator)
+ entry.Title = strings.TrimSpace(r.Title)
+ entry.Author = strings.TrimSpace(r.Creator)
entry.URL = r.Link
entry.Content = processor.ItemContentProcessor(entry.URL, r.Description)
entry.Hash = getHash(r)
diff --git a/reader/rss/rss.go b/reader/rss/rss.go
index 2c63514..5573986 100644
--- a/reader/rss/rss.go
+++ b/reader/rss/rss.go
@@ -16,7 +16,6 @@ import (
"github.com/miniflux/miniflux2/model"
"github.com/miniflux/miniflux2/reader/date"
"github.com/miniflux/miniflux2/reader/processor"
- "github.com/miniflux/miniflux2/reader/sanitizer"
)
type rssFeed struct {
@@ -68,7 +67,7 @@ type rssEnclosure struct {
func (r *rssFeed) GetSiteURL() string {
for _, element := range r.Links {
if element.XMLName.Space == "" {
- return element.Data
+ return strings.TrimSpace(element.Data)
}
}
@@ -78,7 +77,7 @@ func (r *rssFeed) GetSiteURL() string {
func (r *rssFeed) GetFeedURL() string {
for _, element := range r.Links {
if element.XMLName.Space == "http://www.w3.org/2005/Atom" {
- return element.Href
+ return strings.TrimSpace(element.Href)
}
}
@@ -89,7 +88,7 @@ func (r *rssFeed) Transform() *model.Feed {
feed := new(model.Feed)
feed.SiteURL = r.GetSiteURL()
feed.FeedURL = r.GetFeedURL()
- feed.Title = sanitizer.StripTags(r.Title)
+ feed.Title = strings.TrimSpace(r.Title)
if feed.Title == "" {
feed.Title = feed.SiteURL
@@ -101,7 +100,7 @@ func (r *rssFeed) Transform() *model.Feed {
if entry.Author == "" && r.ItunesAuthor != "" {
entry.Author = r.ItunesAuthor
}
- entry.Author = sanitizer.StripTags(entry.Author)
+ entry.Author = strings.TrimSpace(entry.Author)
if entry.URL == "" {
entry.URL = feed.SiteURL
@@ -112,6 +111,7 @@ func (r *rssFeed) Transform() *model.Feed {
return feed
}
+
func (r *rssItem) GetDate() time.Time {
value := r.PubDate
if r.Date != "" {
@@ -170,11 +170,11 @@ func (r *rssItem) GetURL() string {
for _, link := range r.Links {
if link.XMLName.Space == "http://www.w3.org/2005/Atom" && link.Href != "" && isValidLinkRelation(link.Rel) {
- return link.Href
+ return strings.TrimSpace(link.Href)
}
if link.Data != "" {
- return link.Data
+ return strings.TrimSpace(link.Data)
}
}
@@ -212,7 +212,7 @@ func (r *rssItem) Transform() *model.Entry {
entry.Author = r.GetAuthor()
entry.Hash = r.GetHash()
entry.Content = processor.ItemContentProcessor(entry.URL, r.GetContent())
- entry.Title = sanitizer.StripTags(strings.Trim(r.Title, " \n\t"))
+ entry.Title = strings.TrimSpace(r.Title)
entry.Enclosures = r.GetEnclosures()
if entry.Title == "" {