diff options
Diffstat (limited to 'reader/atom/atom_10.go')
-rw-r--r-- | reader/atom/atom_10.go | 216 |
1 files changed, 216 insertions, 0 deletions
diff --git a/reader/atom/atom_10.go b/reader/atom/atom_10.go new file mode 100644 index 0000000..099cbed --- /dev/null +++ b/reader/atom/atom_10.go @@ -0,0 +1,216 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package atom // import "miniflux.app/reader/atom" + +import ( + "encoding/xml" + "html" + "strconv" + "strings" + "time" + + "miniflux.app/crypto" + "miniflux.app/logger" + "miniflux.app/model" + "miniflux.app/reader/date" + "miniflux.app/reader/media" + "miniflux.app/reader/sanitizer" + "miniflux.app/url" +) + +// Specs: +// https://tools.ietf.org/html/rfc4287 +// https://validator.w3.org/feed/docs/atom.html +type atom10Feed struct { + XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"` + ID string `xml:"id"` + Title atom10Text `xml:"title"` + Author atomPerson `xml:"author"` + Links atomLinks `xml:"link"` + Entries []atom10Entry `xml:"entry"` +} + +func (a *atom10Feed) Transform() *model.Feed { + feed := new(model.Feed) + feed.FeedURL = a.Links.firstLinkWithRelation("self") + feed.SiteURL = a.Links.originalLink() + feed.Title = a.Title.String() + + if feed.Title == "" { + feed.Title = feed.SiteURL + } + + for _, entry := range a.Entries { + item := entry.Transform() + entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL) + if err == nil { + item.URL = entryURL + } + + if item.Author == "" { + item.Author = a.Author.String() + } + + if item.Title == "" { + item.Title = item.URL + } + + feed.Entries = append(feed.Entries, item) + } + + return feed +} + +type atom10Entry struct { + ID string `xml:"id"` + Title atom10Text `xml:"title"` + Published string `xml:"published"` + Updated string `xml:"updated"` + Links atomLinks `xml:"link"` + Summary atom10Text `xml:"summary"` + Content atom10Text `xml:"http://www.w3.org/2005/Atom content"` + Author atomPerson `xml:"author"` + media.Element +} + +func (a *atom10Entry) Transform() *model.Entry { + entry := new(model.Entry) + entry.URL = a.Links.originalLink() + entry.Date = a.entryDate() + entry.Author = a.Author.String() + entry.Hash = a.entryHash() + entry.Content = a.entryContent() + entry.Title = a.entryTitle() + entry.Enclosures = a.entryEnclosures() + entry.CommentsURL = a.Links.firstLinkWithRelationAndType("replies", "text/html") + return entry +} + +func (a *atom10Entry) entryTitle() string { + return sanitizer.StripTags(a.Title.String()) +} + +func (a *atom10Entry) entryContent() string { + content := a.Content.String() + if content != "" { + return content + } + + summary := a.Summary.String() + if summary != "" { + return summary + } + + mediaDescription := a.FirstMediaDescription() + if mediaDescription != "" { + return mediaDescription + } + + return "" +} + +// Note: The published date represents the original creation date for YouTube feeds. +// Example: +// <published>2019-01-26T08:02:28+00:00</published> +// <updated>2019-01-29T07:27:27+00:00</updated> +func (a *atom10Entry) entryDate() time.Time { + dateText := a.Published + if dateText == "" { + dateText = a.Updated + } + + if dateText != "" { + result, err := date.Parse(dateText) + if err != nil { + logger.Error("atom: %v", err) + return time.Now() + } + + return result + } + + return time.Now() +} + +func (a *atom10Entry) entryHash() string { + for _, value := range []string{a.ID, a.Links.originalLink()} { + if value != "" { + return crypto.Hash(value) + } + } + + return "" +} + +func (a *atom10Entry) entryEnclosures() model.EnclosureList { + enclosures := make(model.EnclosureList, 0) + duplicates := make(map[string]bool, 0) + + for _, mediaThumbnail := range a.AllMediaThumbnails() { + if _, found := duplicates[mediaThumbnail.URL]; !found { + duplicates[mediaThumbnail.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaThumbnail.URL, + MimeType: mediaThumbnail.MimeType(), + Size: mediaThumbnail.Size(), + }) + } + } + + for _, link := range a.Links { + if strings.ToLower(link.Rel) == "enclosure" { + if _, found := duplicates[link.URL]; !found { + duplicates[link.URL] = true + length, _ := strconv.ParseInt(link.Length, 10, 0) + enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length}) + } + } + } + + for _, mediaContent := range a.AllMediaContents() { + if _, found := duplicates[mediaContent.URL]; !found { + duplicates[mediaContent.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaContent.URL, + MimeType: mediaContent.MimeType(), + Size: mediaContent.Size(), + }) + } + } + + for _, mediaPeerLink := range a.AllMediaPeerLinks() { + if _, found := duplicates[mediaPeerLink.URL]; !found { + duplicates[mediaPeerLink.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaPeerLink.URL, + MimeType: mediaPeerLink.MimeType(), + Size: mediaPeerLink.Size(), + }) + } + } + + return enclosures +} + +type atom10Text struct { + Type string `xml:"type,attr"` + Data string `xml:",chardata"` + XML string `xml:",innerxml"` +} + +func (a *atom10Text) String() string { + content := "" + + switch { + case a.Type == "xhtml": + content = a.XML + case a.Type == "html": + content = a.Data + case a.Type == "text" || a.Type == "": + content = html.EscapeString(a.Data) + } + + return strings.TrimSpace(content) +} |