From 6618caca81a1fc0742034937e3a8c3f503d41852 Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Mon, 20 Nov 2017 18:50:16 -0800 Subject: Use more idiomatic code for Atom parser --- reader/feed/atom/atom.go | 189 ++++++++++++++++++++------------------------- reader/feed/atom/parser.go | 2 +- 2 files changed, 85 insertions(+), 106 deletions(-) (limited to 'reader/feed') diff --git a/reader/feed/atom/atom.go b/reader/feed/atom/atom.go index db6172f..2035a4e 100644 --- a/reader/feed/atom/atom.go +++ b/reader/feed/atom/atom.go @@ -6,87 +6,64 @@ package atom import ( "encoding/xml" + "log" + "strconv" + "strings" + "time" + "github.com/miniflux/miniflux2/helper" "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/feed/date" "github.com/miniflux/miniflux2/reader/processor" "github.com/miniflux/miniflux2/reader/sanitizer" - "log" - "strconv" - "strings" - "time" ) -type AtomFeed struct { +type atomFeed struct { XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"` ID string `xml:"id"` Title string `xml:"title"` - Author Author `xml:"author"` - Links []Link `xml:"link"` - Entries []AtomEntry `xml:"entry"` + Author atomAuthor `xml:"author"` + Links []atomLink `xml:"link"` + Entries []atomEntry `xml:"entry"` } -type AtomEntry struct { - ID string `xml:"id"` - Title string `xml:"title"` - Updated string `xml:"updated"` - Links []Link `xml:"link"` - Summary string `xml:"summary"` - Content Content `xml:"content"` - MediaGroup MediaGroup `xml:"http://search.yahoo.com/mrss/ group"` - Author Author `xml:"author"` +type atomEntry struct { + ID string `xml:"id"` + Title string `xml:"title"` + Updated string `xml:"updated"` + Links []atomLink `xml:"link"` + Summary string `xml:"summary"` + Content atomContent `xml:"content"` + MediaGroup atomMediaGroup `xml:"http://search.yahoo.com/mrss/ group"` + Author atomAuthor `xml:"author"` } -type Author struct { +type atomAuthor struct { Name string `xml:"name"` Email string `xml:"email"` } -type Link struct { - Url string `xml:"href,attr"` +type atomLink struct { + URL string `xml:"href,attr"` Type string `xml:"type,attr"` Rel string `xml:"rel,attr"` Length string `xml:"length,attr"` } -type Content struct { +type atomContent struct { Type string `xml:"type,attr"` Data string `xml:",chardata"` - Xml string `xml:",innerxml"` + XML string `xml:",innerxml"` } -type MediaGroup struct { +type atomMediaGroup struct { Description string `xml:"http://search.yahoo.com/mrss/ description"` } -func (a *AtomFeed) getSiteURL() string { - for _, link := range a.Links { - if strings.ToLower(link.Rel) == "alternate" { - return link.Url - } - - if link.Rel == "" && link.Type == "" { - return link.Url - } - } - - return "" -} - -func (a *AtomFeed) getFeedURL() string { - for _, link := range a.Links { - if strings.ToLower(link.Rel) == "self" { - return link.Url - } - } - - return "" -} - -func (a *AtomFeed) Transform() *model.Feed { +func (a *atomFeed) Transform() *model.Feed { feed := new(model.Feed) - feed.FeedURL = a.getFeedURL() - feed.SiteURL = a.getSiteURL() + feed.FeedURL = getRelationURL(a.Links, "self") + feed.SiteURL = getURL(a.Links) feed.Title = sanitizer.StripTags(a.Title) if feed.Title == "" { @@ -96,7 +73,7 @@ func (a *AtomFeed) Transform() *model.Feed { for _, entry := range a.Entries { item := entry.Transform() if item.Author == "" { - item.Author = a.GetAuthor() + item.Author = getAuthor(a.Author) } feed.Entries = append(feed.Entries, item) @@ -105,103 +82,105 @@ func (a *AtomFeed) Transform() *model.Feed { return feed } -func (a *AtomFeed) GetAuthor() string { - return getAuthor(a.Author) -} - -func (e *AtomEntry) GetDate() time.Time { - if e.Updated != "" { - result, err := date.Parse(e.Updated) - if err != nil { - log.Println(err) - return time.Now() - } +func (a *atomEntry) Transform() *model.Entry { + entry := new(model.Entry) + entry.URL = getURL(a.Links) + entry.Date = getDate(a) + entry.Author = sanitizer.StripTags(getAuthor(a.Author)) + entry.Hash = getHash(a) + entry.Content = processor.ItemContentProcessor(entry.URL, getContent(a)) + entry.Title = sanitizer.StripTags(strings.Trim(a.Title, " \n\t")) + entry.Enclosures = getEnclosures(a) - return result + if entry.Title == "" { + entry.Title = entry.URL } - return time.Now() + return entry } -func (e *AtomEntry) GetURL() string { - for _, link := range e.Links { +func getURL(links []atomLink) string { + for _, link := range links { if strings.ToLower(link.Rel) == "alternate" { - return link.Url + return link.URL } if link.Rel == "" && link.Type == "" { - return link.Url + return link.URL } } return "" } -func (e *AtomEntry) GetAuthor() string { - return getAuthor(e.Author) +func getRelationURL(links []atomLink, relation string) string { + for _, link := range links { + if strings.ToLower(link.Rel) == relation { + return link.URL + } + } + + return "" } -func (e *AtomEntry) GetHash() string { - for _, value := range []string{e.ID, e.GetURL()} { - if value != "" { - return helper.Hash(value) +func getDate(a *atomEntry) time.Time { + if a.Updated != "" { + result, err := date.Parse(a.Updated) + if err != nil { + log.Println(err) + return time.Now() } + + return result } - return "" + return time.Now() } -func (e *AtomEntry) GetContent() string { - if e.Content.Type == "html" || e.Content.Type == "text" { - return e.Content.Data +func getContent(a *atomEntry) string { + if a.Content.Type == "html" || a.Content.Type == "text" { + return a.Content.Data } - if e.Content.Type == "xhtml" { - return e.Content.Xml + if a.Content.Type == "xhtml" { + return a.Content.XML } - if e.Summary != "" { - return e.Summary + if a.Summary != "" { + return a.Summary } - if e.MediaGroup.Description != "" { - return e.MediaGroup.Description + if a.MediaGroup.Description != "" { + return a.MediaGroup.Description } return "" } -func (e *AtomEntry) GetEnclosures() model.EnclosureList { - enclosures := make(model.EnclosureList, 0) - - for _, link := range e.Links { - if strings.ToLower(link.Rel) == "enclosure" { - length, _ := strconv.Atoi(link.Length) - enclosures = append(enclosures, &model.Enclosure{URL: link.Url, MimeType: link.Type, Size: length}) +func getHash(a *atomEntry) string { + for _, value := range []string{a.ID, getURL(a.Links)} { + if value != "" { + return helper.Hash(value) } } - return enclosures + return "" } -func (e *AtomEntry) Transform() *model.Entry { - entry := new(model.Entry) - entry.URL = e.GetURL() - entry.Date = e.GetDate() - entry.Author = sanitizer.StripTags(e.GetAuthor()) - entry.Hash = e.GetHash() - entry.Content = processor.ItemContentProcessor(entry.URL, e.GetContent()) - entry.Title = sanitizer.StripTags(strings.Trim(e.Title, " \n\t")) - entry.Enclosures = e.GetEnclosures() +func getEnclosures(a *atomEntry) model.EnclosureList { + enclosures := make(model.EnclosureList, 0) - if entry.Title == "" { - entry.Title = entry.URL + for _, link := range a.Links { + if strings.ToLower(link.Rel) == "enclosure" { + length, _ := strconv.Atoi(link.Length) + enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length}) + } } - return entry + return enclosures } -func getAuthor(author Author) string { +func getAuthor(author atomAuthor) string { if author.Name != "" { return author.Name } diff --git a/reader/feed/atom/parser.go b/reader/feed/atom/parser.go index ec0d6b4..04bfd04 100644 --- a/reader/feed/atom/parser.go +++ b/reader/feed/atom/parser.go @@ -16,7 +16,7 @@ import ( // Parse returns a normalized feed struct from a Atom feed. func Parse(data io.Reader) (*model.Feed, error) { - atomFeed := new(AtomFeed) + atomFeed := new(atomFeed) decoder := xml.NewDecoder(data) decoder.CharsetReader = charset.NewReaderLabel -- cgit v1.2.3