diff options
author | Frédéric Guillot <fred@miniflux.net> | 2019-12-22 22:18:21 -0800 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2019-12-22 22:42:00 -0800 |
commit | 33fdb2c489727f8af972d69506fef02c121c4fd0 (patch) | |
tree | 0cf62c594f3fb7827b28e57f41483e927d74b740 | |
parent | cfb6ddfcea3e387a7141d4c65099d2f08bc1732b (diff) |
Add support for Atom 0.3
-rw-r--r-- | reader/atom/atom.go | 273 | ||||
-rw-r--r-- | reader/atom/atom_03.go | 163 | ||||
-rw-r--r-- | reader/atom/atom_03_test.go | 219 | ||||
-rw-r--r-- | reader/atom/atom_10.go | 216 | ||||
-rw-r--r-- | reader/atom/atom_10_test.go | 779 | ||||
-rw-r--r-- | reader/atom/atom_common.go | 68 | ||||
-rw-r--r-- | reader/atom/parser.go | 49 | ||||
-rw-r--r-- | reader/atom/parser_test.go | 759 | ||||
-rw-r--r-- | reader/parser/format_test.go | 11 |
9 files changed, 1519 insertions, 1018 deletions
diff --git a/reader/atom/atom.go b/reader/atom/atom.go deleted file mode 100644 index 677f69a..0000000 --- a/reader/atom/atom.go +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package atom // import "miniflux.app/reader/atom" - -import ( - "encoding/xml" - "html" - "strconv" - "strings" - "time" - - "miniflux.app/crypto" - "miniflux.app/logger" - "miniflux.app/model" - "miniflux.app/reader/date" - "miniflux.app/reader/media" - "miniflux.app/reader/sanitizer" - "miniflux.app/url" -) - -type atomFeed struct { - XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"` - ID string `xml:"id"` - Title string `xml:"title"` - Author atomAuthor `xml:"author"` - Links []atomLink `xml:"link"` - Entries []atomEntry `xml:"entry"` -} - -type atomEntry struct { - ID string `xml:"id"` - Title atomContent `xml:"title"` - Published string `xml:"published"` - Updated string `xml:"updated"` - Links []atomLink `xml:"link"` - Summary atomContent `xml:"summary"` - Content atomContent `xml:"http://www.w3.org/2005/Atom content"` - Author atomAuthor `xml:"author"` - media.Element -} - -type atomAuthor struct { - Name string `xml:"name"` - Email string `xml:"email"` -} - -type atomLink struct { - URL string `xml:"href,attr"` - Type string `xml:"type,attr"` - Rel string `xml:"rel,attr"` - Length string `xml:"length,attr"` -} - -type atomContent struct { - Type string `xml:"type,attr"` - Data string `xml:",chardata"` - XML string `xml:",innerxml"` -} - -func (a *atomFeed) Transform() *model.Feed { - feed := new(model.Feed) - feed.FeedURL = getRelationURL(a.Links, "self") - feed.SiteURL = getURL(a.Links) - feed.Title = strings.TrimSpace(a.Title) - - if feed.Title == "" { - feed.Title = feed.SiteURL - } - - for _, entry := range a.Entries { - item := entry.Transform() - entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL) - if err == nil { - item.URL = entryURL - } - - if item.Author == "" { - item.Author = getAuthor(a.Author) - } - - if item.Title == "" { - item.Title = item.URL - } - - feed.Entries = append(feed.Entries, item) - } - - return feed -} - -func (a *atomEntry) Transform() *model.Entry { - entry := new(model.Entry) - entry.URL = getURL(a.Links) - entry.Date = getDate(a) - entry.Author = getAuthor(a.Author) - entry.Hash = getHash(a) - entry.Content = getContent(a) - entry.Title = getTitle(a) - entry.Enclosures = getEnclosures(a) - entry.CommentsURL = getRelationURLWithType(a.Links, "replies", "text/html") - return entry -} - -func getURL(links []atomLink) string { - for _, link := range links { - if strings.ToLower(link.Rel) == "alternate" { - return strings.TrimSpace(link.URL) - } - - if link.Rel == "" && link.Type == "" { - return strings.TrimSpace(link.URL) - } - } - - return "" -} - -func getRelationURL(links []atomLink, relation string) string { - for _, link := range links { - if strings.ToLower(link.Rel) == relation { - return strings.TrimSpace(link.URL) - } - } - - return "" -} - -func getRelationURLWithType(links []atomLink, relation, contentType string) string { - for _, link := range links { - if strings.ToLower(link.Rel) == relation && strings.ToLower(link.Type) == contentType { - return strings.TrimSpace(link.URL) - } - } - - return "" -} - -func getDate(a *atomEntry) time.Time { - // Note: The published date represents the original creation date for YouTube feeds. - // Example: - // <published>2019-01-26T08:02:28+00:00</published> - // <updated>2019-01-29T07:27:27+00:00</updated> - dateText := a.Published - if dateText == "" { - dateText = a.Updated - } - - if dateText != "" { - result, err := date.Parse(dateText) - if err != nil { - logger.Error("atom: %v", err) - return time.Now() - } - - return result - } - - return time.Now() -} - -func atomContentToString(c atomContent) string { - if c.Type == "xhtml" { - return c.XML - } - - if c.Type == "html" { - return c.Data - } - - if c.Type == "text" || c.Type == "" { - return html.EscapeString(c.Data) - } - - return "" -} - -func getContent(a *atomEntry) string { - r := atomContentToString(a.Content) - if r != "" { - return r - } - - r = atomContentToString(a.Summary) - if r != "" { - return r - } - - mediaDescription := a.FirstMediaDescription() - if mediaDescription != "" { - return mediaDescription - } - - return "" -} - -func getTitle(a *atomEntry) string { - title := atomContentToString(a.Title) - return strings.TrimSpace(sanitizer.StripTags(title)) -} - -func getHash(a *atomEntry) string { - for _, value := range []string{a.ID, getURL(a.Links)} { - if value != "" { - return crypto.Hash(value) - } - } - - return "" -} - -func getEnclosures(a *atomEntry) model.EnclosureList { - enclosures := make(model.EnclosureList, 0) - duplicates := make(map[string]bool, 0) - - for _, mediaThumbnail := range a.AllMediaThumbnails() { - if _, found := duplicates[mediaThumbnail.URL]; !found { - duplicates[mediaThumbnail.URL] = true - enclosures = append(enclosures, &model.Enclosure{ - URL: mediaThumbnail.URL, - MimeType: mediaThumbnail.MimeType(), - Size: mediaThumbnail.Size(), - }) - } - } - - for _, link := range a.Links { - if strings.ToLower(link.Rel) == "enclosure" { - if _, found := duplicates[link.URL]; !found { - duplicates[link.URL] = true - length, _ := strconv.ParseInt(link.Length, 10, 0) - enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length}) - } - } - } - - for _, mediaContent := range a.AllMediaContents() { - if _, found := duplicates[mediaContent.URL]; !found { - duplicates[mediaContent.URL] = true - enclosures = append(enclosures, &model.Enclosure{ - URL: mediaContent.URL, - MimeType: mediaContent.MimeType(), - Size: mediaContent.Size(), - }) - } - } - - for _, mediaPeerLink := range a.AllMediaPeerLinks() { - if _, found := duplicates[mediaPeerLink.URL]; !found { - duplicates[mediaPeerLink.URL] = true - enclosures = append(enclosures, &model.Enclosure{ - URL: mediaPeerLink.URL, - MimeType: mediaPeerLink.MimeType(), - Size: mediaPeerLink.Size(), - }) - } - } - - return enclosures -} - -func getAuthor(author atomAuthor) string { - if author.Name != "" { - return strings.TrimSpace(author.Name) - } - - if author.Email != "" { - return strings.TrimSpace(author.Email) - } - - return "" -} diff --git a/reader/atom/atom_03.go b/reader/atom/atom_03.go new file mode 100644 index 0000000..7a86204 --- /dev/null +++ b/reader/atom/atom_03.go @@ -0,0 +1,163 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package atom // import "miniflux.app/reader/atom" + +import ( + "encoding/base64" + "html" + "strings" + "time" + + "miniflux.app/crypto" + "miniflux.app/logger" + "miniflux.app/model" + "miniflux.app/reader/date" + "miniflux.app/reader/sanitizer" + "miniflux.app/url" +) + +// Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html +type atom03Feed struct { + ID string `xml:"id"` + Title atom03Text `xml:"title"` + Author atomPerson `xml:"author"` + Links atomLinks `xml:"link"` + Entries []atom03Entry `xml:"entry"` +} + +func (a *atom03Feed) Transform() *model.Feed { + feed := new(model.Feed) + feed.FeedURL = a.Links.firstLinkWithRelation("self") + feed.SiteURL = a.Links.originalLink() + feed.Title = a.Title.String() + + if feed.Title == "" { + feed.Title = feed.SiteURL + } + + for _, entry := range a.Entries { + item := entry.Transform() + entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL) + if err == nil { + item.URL = entryURL + } + + if item.Author == "" { + item.Author = a.Author.String() + } + + if item.Title == "" { + item.Title = item.URL + } + + feed.Entries = append(feed.Entries, item) + } + + return feed +} + +type atom03Entry struct { + ID string `xml:"id"` + Title atom03Text `xml:"title"` + Modified string `xml:"modified"` + Issued string `xml:"issued"` + Created string `xml:"created"` + Links atomLinks `xml:"link"` + Summary atom03Text `xml:"summary"` + Content atom03Text `xml:"content"` + Author atomPerson `xml:"author"` +} + +func (a *atom03Entry) Transform() *model.Entry { + entry := new(model.Entry) + entry.URL = a.Links.originalLink() + entry.Date = a.entryDate() + entry.Author = a.Author.String() + entry.Hash = a.entryHash() + entry.Content = a.entryContent() + entry.Title = a.entryTitle() + return entry +} + +func (a *atom03Entry) entryTitle() string { + return sanitizer.StripTags(a.Title.String()) +} + +func (a *atom03Entry) entryContent() string { + content := a.Content.String() + if content != "" { + return content + } + + summary := a.Summary.String() + if summary != "" { + return summary + } + + return "" +} + +func (a *atom03Entry) entryDate() time.Time { + dateText := "" + for _, value := range []string{a.Issued, a.Modified, a.Created} { + if value != "" { + dateText = value + break + } + } + + if dateText != "" { + result, err := date.Parse(dateText) + if err != nil { + logger.Error("atom: %v", err) + return time.Now() + } + + return result + } + + return time.Now() +} + +func (a *atom03Entry) entryHash() string { + for _, value := range []string{a.ID, a.Links.originalLink()} { + if value != "" { + return crypto.Hash(value) + } + } + + return "" +} + +type atom03Text struct { + Type string `xml:"type,attr"` + Mode string `xml:"mode,attr"` + Data string `xml:",chardata"` + XML string `xml:",innerxml"` +} + +func (a *atom03Text) String() string { + content := "" + + switch { + case a.Mode == "xml": + content = a.XML + case a.Mode == "escaped": + content = a.Data + case a.Mode == "base64": + b, err := base64.StdEncoding.DecodeString(a.Data) + if err == nil { + content = string(b) + } + default: + content = a.Data + } + + if a.Type != "text/html" { + content = html.EscapeString(content) + } + + return strings.TrimSpace(content) +} diff --git a/reader/atom/atom_03_test.go b/reader/atom/atom_03_test.go new file mode 100644 index 0000000..063b02c --- /dev/null +++ b/reader/atom/atom_03_test.go @@ -0,0 +1,219 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package atom // import "miniflux.app/reader/atom" + +import ( + "bytes" + "testing" + "time" +) + +func TestParseAtom03(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed version="0.3" xmlns="http://purl.org/atom/ns#"> + <title>dive into mark</title> + <link rel="alternate" type="text/html" href="http://diveintomark.org/"/> + <modified>2003-12-13T18:30:02Z</modified> + <author><name>Mark Pilgrim</name></author> + <entry> + <title>Atom 0.3 snapshot</title> + <link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/> + <id>tag:diveintomark.org,2003:3.2397</id> + <issued>2003-12-13T08:29:29-04:00</issued> + <modified>2003-12-13T18:30:02Z</modified> + <summary type="text/plain">It's a test</summary> + <content type="text/html" mode="escaped"><![CDATA[<p>HTML content</p>]]></content> + </entry> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Title != "dive into mark" { + t.Errorf("Incorrect title, got: %s", feed.Title) + } + + if feed.FeedURL != "" { + t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) + } + + if feed.SiteURL != "http://diveintomark.org/" { + t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + tz := time.FixedZone("Test Case Time", -int((4 * time.Hour).Seconds())) + if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 8, 29, 29, 0, tz)) { + t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) + } + + if feed.Entries[0].Hash != "b70d30334b808f32e66eb19fabb263525cecd18f205720b583e84f7f295cf728" { + t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) + } + + if feed.Entries[0].URL != "http://diveintomark.org/2003/12/13/atom03" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if feed.Entries[0].Title != "Atom 0.3 snapshot" { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } + + if feed.Entries[0].Content != "<p>HTML content</p>" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } + + if feed.Entries[0].Author != "Mark Pilgrim" { + t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) + } +} + +func TestParseAtom03WithoutFeedTitle(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed version="0.3" xmlns="http://purl.org/atom/ns#"> + <link rel="alternate" type="text/html" href="http://diveintomark.org/"/> + <modified>2003-12-13T18:30:02Z</modified> + <author><name>Mark Pilgrim</name></author> + <entry> + <title>Atom 0.3 snapshot</title> + <link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/> + <id>tag:diveintomark.org,2003:3.2397</id> + </entry> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Title != "http://diveintomark.org/" { + t.Errorf("Incorrect title, got: %s", feed.Title) + } +} + +func TestParseAtom03WithoutEntryTitle(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed version="0.3" xmlns="http://purl.org/atom/ns#"> + <title>dive into mark</title> + <link rel="alternate" type="text/html" href="http://diveintomark.org/"/> + <modified>2003-12-13T18:30:02Z</modified> + <author><name>Mark Pilgrim</name></author> + <entry> + <link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/> + <id>tag:diveintomark.org,2003:3.2397</id> + </entry> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].Title != "http://diveintomark.org/2003/12/13/atom03" { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } +} + +func TestParseAtom03WithSummaryOnly(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed version="0.3" xmlns="http://purl.org/atom/ns#"> + <title>dive into mark</title> + <link rel="alternate" type="text/html" href="http://diveintomark.org/"/> + <modified>2003-12-13T18:30:02Z</modified> + <author><name>Mark Pilgrim</name></author> + <entry> + <title>Atom 0.3 snapshot</title> + <link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/> + <id>tag:diveintomark.org,2003:3.2397</id> + <issued>2003-12-13T08:29:29-04:00</issued> + <modified>2003-12-13T18:30:02Z</modified> + <summary type="text/plain">It's a test</summary> + </entry> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].Content != "It's a test" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseAtom03WithXMLContent(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed version="0.3" xmlns="http://purl.org/atom/ns#"> + <title>dive into mark</title> + <link rel="alternate" type="text/html" href="http://diveintomark.org/"/> + <modified>2003-12-13T18:30:02Z</modified> + <author><name>Mark Pilgrim</name></author> + <entry> + <title>Atom 0.3 snapshot</title> + <link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/> + <id>tag:diveintomark.org,2003:3.2397</id> + <issued>2003-12-13T08:29:29-04:00</issued> + <modified>2003-12-13T18:30:02Z</modified> + <content mode="xml" type="text/html"><p>Some text.</p></content> + </entry> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].Content != "<p>Some text.</p>" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseAtom03WithBase64Content(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed version="0.3" xmlns="http://purl.org/atom/ns#"> + <title>dive into mark</title> + <link rel="alternate" type="text/html" href="http://diveintomark.org/"/> + <modified>2003-12-13T18:30:02Z</modified> + <author><name>Mark Pilgrim</name></author> + <entry> + <title>Atom 0.3 snapshot</title> + <link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/> + <id>tag:diveintomark.org,2003:3.2397</id> + <issued>2003-12-13T08:29:29-04:00</issued> + <modified>2003-12-13T18:30:02Z</modified> + <content mode="base64" type="text/html">PHA+U29tZSB0ZXh0LjwvcD4=</content> + </entry> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].Content != "<p>Some text.</p>" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} diff --git a/reader/atom/atom_10.go b/reader/atom/atom_10.go new file mode 100644 index 0000000..099cbed --- /dev/null +++ b/reader/atom/atom_10.go @@ -0,0 +1,216 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package atom // import "miniflux.app/reader/atom" + +import ( + "encoding/xml" + "html" + "strconv" + "strings" + "time" + + "miniflux.app/crypto" + "miniflux.app/logger" + "miniflux.app/model" + "miniflux.app/reader/date" + "miniflux.app/reader/media" + "miniflux.app/reader/sanitizer" + "miniflux.app/url" +) + +// Specs: +// https://tools.ietf.org/html/rfc4287 +// https://validator.w3.org/feed/docs/atom.html +type atom10Feed struct { + XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"` + ID string `xml:"id"` + Title atom10Text `xml:"title"` + Author atomPerson `xml:"author"` + Links atomLinks `xml:"link"` + Entries []atom10Entry `xml:"entry"` +} + +func (a *atom10Feed) Transform() *model.Feed { + feed := new(model.Feed) + feed.FeedURL = a.Links.firstLinkWithRelation("self") + feed.SiteURL = a.Links.originalLink() + feed.Title = a.Title.String() + + if feed.Title == "" { + feed.Title = feed.SiteURL + } + + for _, entry := range a.Entries { + item := entry.Transform() + entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL) + if err == nil { + item.URL = entryURL + } + + if item.Author == "" { + item.Author = a.Author.String() + } + + if item.Title == "" { + item.Title = item.URL + } + + feed.Entries = append(feed.Entries, item) + } + + return feed +} + +type atom10Entry struct { + ID string `xml:"id"` + Title atom10Text `xml:"title"` + Published string `xml:"published"` + Updated string `xml:"updated"` + Links atomLinks `xml:"link"` + Summary atom10Text `xml:"summary"` + Content atom10Text `xml:"http://www.w3.org/2005/Atom content"` + Author atomPerson `xml:"author"` + media.Element +} + +func (a *atom10Entry) Transform() *model.Entry { + entry := new(model.Entry) + entry.URL = a.Links.originalLink() + entry.Date = a.entryDate() + entry.Author = a.Author.String() + entry.Hash = a.entryHash() + entry.Content = a.entryContent() + entry.Title = a.entryTitle() + entry.Enclosures = a.entryEnclosures() + entry.CommentsURL = a.Links.firstLinkWithRelationAndType("replies", "text/html") + return entry +} + +func (a *atom10Entry) entryTitle() string { + return sanitizer.StripTags(a.Title.String()) +} + +func (a *atom10Entry) entryContent() string { + content := a.Content.String() + if content != "" { + return content + } + + summary := a.Summary.String() + if summary != "" { + return summary + } + + mediaDescription := a.FirstMediaDescription() + if mediaDescription != "" { + return mediaDescription + } + + return "" +} + +// Note: The published date represents the original creation date for YouTube feeds. +// Example: +// <published>2019-01-26T08:02:28+00:00</published> +// <updated>2019-01-29T07:27:27+00:00</updated> +func (a *atom10Entry) entryDate() time.Time { + dateText := a.Published + if dateText == "" { + dateText = a.Updated + } + + if dateText != "" { + result, err := date.Parse(dateText) + if err != nil { + logger.Error("atom: %v", err) + return time.Now() + } + + return result + } + + return time.Now() +} + +func (a *atom10Entry) entryHash() string { + for _, value := range []string{a.ID, a.Links.originalLink()} { + if value != "" { + return crypto.Hash(value) + } + } + + return "" +} + +func (a *atom10Entry) entryEnclosures() model.EnclosureList { + enclosures := make(model.EnclosureList, 0) + duplicates := make(map[string]bool, 0) + + for _, mediaThumbnail := range a.AllMediaThumbnails() { + if _, found := duplicates[mediaThumbnail.URL]; !found { + duplicates[mediaThumbnail.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaThumbnail.URL, + MimeType: mediaThumbnail.MimeType(), + Size: mediaThumbnail.Size(), + }) + } + } + + for _, link := range a.Links { + if strings.ToLower(link.Rel) == "enclosure" { + if _, found := duplicates[link.URL]; !found { + duplicates[link.URL] = true + length, _ := strconv.ParseInt(link.Length, 10, 0) + enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length}) + } + } + } + + for _, mediaContent := range a.AllMediaContents() { + if _, found := duplicates[mediaContent.URL]; !found { + duplicates[mediaContent.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaContent.URL, + MimeType: mediaContent.MimeType(), + Size: mediaContent.Size(), + }) + } + } + + for _, mediaPeerLink := range a.AllMediaPeerLinks() { + if _, found := duplicates[mediaPeerLink.URL]; !found { + duplicates[mediaPeerLink.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaPeerLink.URL, + MimeType: mediaPeerLink.MimeType(), + Size: mediaPeerLink.Size(), + }) + } + } + + return enclosures +} + +type atom10Text struct { + Type string `xml:"type,attr"` + Data string `xml:",chardata"` + XML string `xml:",innerxml"` +} + +func (a *atom10Text) String() string { + content := "" + + switch { + case a.Type == "xhtml": + content = a.XML + case a.Type == "html": + content = a.Data + case a.Type == "text" || a.Type == "": + content = html.EscapeString(a.Data) + } + + return strings.TrimSpace(content) +} diff --git a/reader/atom/atom_10_test.go b/reader/atom/atom_10_test.go new file mode 100644 index 0000000..63127c4 --- /dev/null +++ b/reader/atom/atom_10_test.go @@ -0,0 +1,779 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package atom // import "miniflux.app/reader/atom" + +import ( + "bytes" + "testing" + "time" +) + +func TestParseAtomSample(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + + <title>Example Feed</title> + <link href="http://example.org/"/> + <updated>2003-12-13T18:30:02Z</updated> + <author> + <name>John Doe</name> + </author> + <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id> + + <entry> + <title>Atom-Powered Robots Run Amok</title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Title != "Example Feed" { + t.Errorf("Incorrect title, got: %s", feed.Title) + } + + if feed.FeedURL != "" { + t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) + } + + if feed.SiteURL != "http://example.org/" { + t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) { + t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) + } + + if feed.Entries[0].Hash != "3841e5cf232f5111fc5841e9eba5f4b26d95e7d7124902e0f7272729d65601a6" { + t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) + } + + if feed.Entries[0].URL != "http://example.org/2003/12/13/atom03" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if feed.Entries[0].CommentsURL != "" { + t.Errorf("Incorrect entry Comments URL, got: %s", feed.Entries[0].CommentsURL) + } + + if feed.Entries[0].Title != "Atom-Powered Robots Run Amok" { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } + + if feed.Entries[0].Content != "Some text." { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } + + if feed.Entries[0].Author != "John Doe" { + t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) + } +} + +func TestParseFeedWithoutTitle(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <link rel="alternate" type="text/html" href="https://example.org/"/> + <link rel="self" type="application/atom+xml" href="https://example.org/feed"/> + <updated>2003-12-13T18:30:02Z</updated> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Title != "https://example.org/" { + t.Errorf("Incorrect feed title, got: %s", feed.Title) + } +} + +func TestParseEntryWithoutTitle(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + + <title>Example Feed</title> + <link href="http://example.org/"/> + <updated>2003-12-13T18:30:02Z</updated> + <author> + <name>John Doe</name> + </author> + <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id> + + <entry> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "http://example.org/2003/12/13/atom03" { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } +} + +func TestParseFeedURL(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link rel="alternate" type="text/html" href="https://example.org/"/> + <link rel="self" type="application/atom+xml" href="https://example.org/feed"/> + <updated>2003-12-13T18:30:02Z</updated> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.SiteURL != "https://example.org/" { + t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) + } + + if feed.FeedURL != "https://example.org/feed" { + t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) + } +} + +func TestParseEntryWithRelativeURL(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title>Test</title> + <link href="something.html"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].URL != "http://example.org/something.html" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } +} + +func TestParseEntryTitleWithWhitespaces(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title> + Some Title + </title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "Some Title" { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } +} + +func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title type="html"><![CDATA[Test “Test”]]></title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "Test “Test”" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + +func TestParseEntryTitleWithHTML(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title type="html"><code>Test</code> Test</title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "Test Test" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + +func TestParseEntryTitleWithXHTML(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title type="xhtml"><code>Test</code> Test</title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "Test Test" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + +func TestParseEntrySummaryWithXHTML(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title type="xhtml"><code>Test</code> Test</title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary type="xhtml"><p>Some text.</p></summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Content != "<p>Some text.</p>" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseEntrySummaryWithHTML(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title type="html"><code>Test</code> Test</title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary type="html"><![CDATA[<p>Some text.</p>]]></summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Content != "<p>Some text.</p>" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseEntrySummaryWithPlainText(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title type="html"><code>Test</code> Test</title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary type="text"><![CDATA[<Some text.>]]></summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Content != "<Some text.>" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseEntryWithAuthorName(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + <author> + <name>Me</name> + <email>me@localhost</email> + </author> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Author != "Me" { + t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) + } +} + +func TestParseEntryWithoutAuthorName(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + <author> + <name/> + <email>me@localhost</email> + </author> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Author != "me@localhost" { + t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) + } +} + +func TestParseEntryWithEnclosures(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <id>http://www.example.org/myfeed</id> + <title>My Podcast Feed</title> + <updated>2005-07-15T12:00:00Z</updated> + <author> + <name>John Doe</name> + </author> + <link href="http://example.org" /> + <link rel="self" href="http://example.org/myfeed" /> + <entry> + <id>http://www.example.org/entries/1</id> + <title>Atom 1.0</title> + <updated>2005-07-15T12:00:00Z</updated> + <link href="http://www.example.org/entries/1" /> + <summary>An overview of Atom 1.0</summary> + <link rel="enclosure" + type="audio/mpeg" + title="MP3" + href="http://www.example.org/myaudiofile.mp3" + length="1234" /> + <link rel="enclosure" + type="application/x-bittorrent" + title="BitTorrent" + href="http://www.example.org/myaudiofile.torrent" + length="4567" /> + <content type="xhtml"> + <div xmlns="http://www.w3.org/1999/xhtml"> + <h1>Show Notes</h1> + <ul> + <li>00:01:00 -- Introduction</li> + <li>00:15:00 -- Talking about Atom 1.0</li> + <li>00:30:00 -- Wrapping up</li> + </ul> + </div> + </content> + </entry> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].URL != "http://www.example.org/entries/1" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if len(feed.Entries[0].Enclosures) != 2 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + expectedResults := []struct { + url string + mimeType string + size int64 + }{ + {"http://www.example.org/myaudiofile.mp3", "audio/mpeg", 1234}, + {"http://www.example.org/myaudiofile.torrent", "application/x-bittorrent", 4567}, + } + + for index, enclosure := range feed.Entries[0].Enclosures { + if expectedResults[index].url != enclosure.URL { + t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) + } + + if expectedResults[index].mimeType != enclosure.MimeType { + t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) + } + + if expectedResults[index].size != enclosure.Size { + t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) + } + } +} + +func TestParseEntryWithPublished(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <published>2003-12-13T18:30:02Z</published> + <summary>Some text.</summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) { + t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) + } +} + +func TestParseEntryWithPublishedAndUpdated(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <published>2002-11-12T18:30:02Z</published> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if !feed.Entries[0].Date.Equal(time.Date(2002, time.November, 12, 18, 30, 2, 0, time.UTC)) { + t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) + } +} + +func TestParseInvalidXml(t *testing.T) { + data := `garbage` + _, err := Parse(bytes.NewBufferString(data)) + if err == nil { + t.Error("Parse should returns an error") + } +} + +func TestParseWithHTMLEntity(t *testing.T) { + data := ` + <?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + </feed> + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Title != "Example \u00a0 Feed" { + t.Errorf(`Incorrect title, got: %q`, feed.Title) + } +} + +func TestParseWithInvalidCharacterEntity(t *testing.T) { + data := ` + <?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/a&b"/> + </feed> + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.SiteURL != "http://example.org/a&b" { + t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL) + } +} + +func TestParseMediaGroup(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/"> + <id>http://www.example.org/myfeed</id> + <title>My Video Feed</title> + <updated>2005-07-15T12:00:00Z</updated> + <link href="http://example.org" /> + <link rel="self" href="http://example.org/myfeed" /> + <entry> + <id>http://www.example.org/entries/1</id> + <title>Some Video</title> + <updated>2005-07-15T12:00:00Z</updated> + <link href="http://www.example.org/entries/1" /> + <media:group> + <media:title>Another title</media:title> + <media:content url="https://www.youtube.com/v/abcd" type="application/x-shockwave-flash" width="640" height="390"/> + <media:thumbnail url="https://example.org/thumbnail.jpg" width="480" height="360"/> + <media:description>Some description +A website: http://example.org/</media:description> + </media:group> + </entry> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].URL != "http://www.example.org/entries/1" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if feed.Entries[0].Content != `Some description<br>A website: <a href="http://example.org/">http://example.org/</a>` { + t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + } + + if len(feed.Entries[0].Enclosures) != 2 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + expectedResults := []struct { + url string + mimeType string + size int64 + }{ + {"https://example.org/thumbnail.jpg", "image/*", 0}, + {"https://www.youtube.com/v/abcd", "application/x-shockwave-flash", 0}, + } + + for index, enclosure := range feed.Entries[0].Enclosures { + if expectedResults[index].url != enclosure.URL { + t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) + } + + if expectedResults[index].mimeType != enclosure.MimeType { + t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) + } + + if expectedResults[index].size != enclosure.Size { + t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) + } + } +} + +func TestParseMediaElements(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/"> + <id>http://www.example.org/myfeed</id> + <title>My Video Feed</title> + <updated>2005-07-15T12:00:00Z</updated> + <link href="http://example.org" /> + <link rel="self" href="http://example.org/myfeed" /> + <entry> + <id>http://www.example.org/entries/1</id> + <title>Some Video</title> + <updated>2005-07-15T12:00:00Z</updated> + <link href="http://www.example.org/entries/1" /> + <media:title>Another title</media:title> + <media:content url="https://www.youtube.com/v/abcd" type="application/x-shockwave-flash" width="640" height="390"/> + <media:thumbnail url="https://example.org/thumbnail.jpg" width="480" height="360"/> + <media:description>Some description +A website: http://example.org/</media:description> + </entry> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].URL != "http://www.example.org/entries/1" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if feed.Entries[0].Content != `Some description<br>A website: <a href="http://example.org/">http://example.org/</a>` { + t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + } + + if len(feed.Entries[0].Enclosures) != 2 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + expectedResults := []struct { + url string + mimeType string + size int64 + }{ + {"https://example.org/thumbnail.jpg", "image/*", 0}, + {"https://www.youtube.com/v/abcd", "application/x-shockwave-flash", 0}, + } + + for index, enclosure := range feed.Entries[0].Enclosures { + if expectedResults[index].url != enclosure.URL { + t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) + } + + if expectedResults[index].mimeType != enclosure.MimeType { + t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) + } + + if expectedResults[index].size != enclosure.Size { + t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) + } + } +} + +func TestParseRepliesLinkRelation(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom" + xmlns:thr="http://purl.org/syndication/thread/1.0"> + <id>http://www.example.org/myfeed</id> + <title>My Example Feed</title> + <updated>2005-07-28T12:00:00Z</updated> + <link href="http://www.example.org/myfeed" /> + <author><name>James</name></author> + <entry> + <id>tag:entries.com,2005:1</id> + <title>My original entry</title> + <updated>2006-03-01T12:12:12Z</updated> + <link href="http://www.example.org/entries/1" /> + <link rel="replies" + type="application/atom+xml" + href="http://www.example.org/mycommentsfeed.xml" + thr:count="10" thr:updated="2005-07-28T12:10:00Z" /> + <link rel="replies" + type="text/html" + href="http://www.example.org/comments.html" + thr:count="10" thr:updated="2005-07-28T12:10:00Z" /> + <summary>This is my original entry</summary> + </entry> + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].URL != "http://www.example.org/entries/1" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if feed.Entries[0].CommentsURL != "http://www.example.org/comments.html" { + t.Errorf("Incorrect entry comments URL, got: %s", feed.Entries[0].CommentsURL) + } +} diff --git a/reader/atom/atom_common.go b/reader/atom/atom_common.go new file mode 100644 index 0000000..85e6b29 --- /dev/null +++ b/reader/atom/atom_common.go @@ -0,0 +1,68 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package atom // import "miniflux.app/reader/atom" + +import "strings" + +type atomPerson struct { + Name string `xml:"name"` + Email string `xml:"email"` +} + +func (a *atomPerson) String() string { + name := "" + + switch { + case a.Name != "": + name = a.Name + case a.Email != "": + name = a.Email + } + + return strings.TrimSpace(name) +} + +type atomLink struct { + URL string `xml:"href,attr"` + Type string `xml:"type,attr"` + Rel string `xml:"rel,attr"` + Length string `xml:"length,attr"` +} + +type atomLinks []*atomLink + +func (a atomLinks) originalLink() string { + for _, link := range a { + if strings.ToLower(link.Rel) == "alternate" { + return strings.TrimSpace(link.URL) + } + + if link.Rel == "" && link.Type == "" { + return strings.TrimSpace(link.URL) + } + } + + return "" +} + +func (a atomLinks) firstLinkWithRelation(relation string) string { + for _, link := range a { + if strings.ToLower(link.Rel) == relation { + return strings.TrimSpace(link.URL) + } + } + + return "" +} + +func (a atomLinks) firstLinkWithRelationAndType(relation, contentType string) string { + for _, link := range a { + if strings.ToLower(link.Rel) == relation && strings.ToLower(link.Type) == contentType { + return strings.TrimSpace(link.URL) + } + } + + return "" +} diff --git a/reader/atom/parser.go b/reader/atom/parser.go index 90a84aa..9a9cb57 100644 --- a/reader/atom/parser.go +++ b/reader/atom/parser.go @@ -5,21 +5,58 @@ package atom // import "miniflux.app/reader/atom" import ( + "bytes" + "encoding/xml" "io" "miniflux.app/errors" "miniflux.app/model" - "miniflux.app/reader/xml" + xml_decoder "miniflux.app/reader/xml" ) +type atomFeed interface { + Transform() *model.Feed +} + // Parse returns a normalized feed struct from a Atom feed. -func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) { - atomFeed := new(atomFeed) - decoder := xml.NewDecoder(data) - err := decoder.Decode(atomFeed) +func Parse(r io.Reader) (*model.Feed, *errors.LocalizedError) { + var buf bytes.Buffer + tee := io.TeeReader(r, &buf) + + var rawFeed atomFeed + if getAtomFeedVersion(tee) == "0.3" { + rawFeed = new(atom03Feed) + } else { + rawFeed = new(atom10Feed) + } + + decoder := xml_decoder.NewDecoder(&buf) + err := decoder.Decode(rawFeed) if err != nil { return nil, errors.NewLocalizedError("Unable to parse Atom feed: %q", err) } - return atomFeed.Transform(), nil + return rawFeed.Transform(), nil +} + +func getAtomFeedVersion(data io.Reader) string { + decoder := xml_decoder.NewDecoder(data) + for { + token, _ := decoder.Token() + if token == nil { + break + } + + if element, ok := token.(xml.StartElement); ok { + if element.Name.Local == "feed" { + for _, attr := range element.Attr { + if attr.Name.Local == "version" && attr.Value == "0.3" { + return "0.3" + } + } + return "1.0" + } + } + } + return "1.0" } diff --git a/reader/atom/parser_test.go b/reader/atom/parser_test.go index 8fcf7d0..fac14ef 100644 --- a/reader/atom/parser_test.go +++ b/reader/atom/parser_test.go @@ -1,4 +1,4 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. +// Copyright 2019 Frédéric Guillot. All rights reserved. // Use of this source code is governed by the Apache 2.0 // license that can be found in the LICENSE file. @@ -7,10 +7,9 @@ package atom // import "miniflux.app/reader/atom" import ( "bytes" "testing" - "time" ) -func TestParseAtomSample(t *testing.T) { +func TestDetectAtom10(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> @@ -32,748 +31,32 @@ func TestParseAtomSample(t *testing.T) { </feed>` - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Title != "Example Feed" { - t.Errorf("Incorrect title, got: %s", feed.Title) - } - - if feed.FeedURL != "" { - t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) - } - - if feed.SiteURL != "http://example.org/" { - t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) { - t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) - } - - if feed.Entries[0].Hash != "3841e5cf232f5111fc5841e9eba5f4b26d95e7d7124902e0f7272729d65601a6" { - t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) - } - - if feed.Entries[0].URL != "http://example.org/2003/12/13/atom03" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].CommentsURL != "" { - t.Errorf("Incorrect entry Comments URL, got: %s", feed.Entries[0].CommentsURL) - } - - if feed.Entries[0].Title != "Atom-Powered Robots Run Amok" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } - - if feed.Entries[0].Content != "Some text." { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } - - if feed.Entries[0].Author != "John Doe" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseFeedWithoutTitle(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <link rel="alternate" type="text/html" href="https://example.org/"/> - <link rel="self" type="application/atom+xml" href="https://example.org/feed"/> - <updated>2003-12-13T18:30:02Z</updated> - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Title != "https://example.org/" { - t.Errorf("Incorrect feed title, got: %s", feed.Title) - } -} - -func TestParseEntryWithoutTitle(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - - <title>Example Feed</title> - <link href="http://example.org/"/> - <updated>2003-12-13T18:30:02Z</updated> - <author> - <name>John Doe</name> - </author> - <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id> - - <entry> - <link href="http://example.org/2003/12/13/atom03"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <updated>2003-12-13T18:30:02Z</updated> - <summary>Some text.</summary> - </entry> - - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Title != "http://example.org/2003/12/13/atom03" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } -} - -func TestParseFeedURL(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link rel="alternate" type="text/html" href="https://example.org/"/> - <link rel="self" type="application/atom+xml" href="https://example.org/feed"/> - <updated>2003-12-13T18:30:02Z</updated> - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.SiteURL != "https://example.org/" { - t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) - } - - if feed.FeedURL != "https://example.org/feed" { - t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) - } -} - -func TestParseEntryWithRelativeURL(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - - <entry> - <title>Test</title> - <link href="something.html"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <updated>2003-12-13T18:30:02Z</updated> - <summary>Some text.</summary> - </entry> - - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].URL != "http://example.org/something.html" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } -} - -func TestParseEntryTitleWithWhitespaces(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - - <entry> - <title> - Some Title - </title> - <link href="http://example.org/2003/12/13/atom03"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <updated>2003-12-13T18:30:02Z</updated> - <summary>Some text.</summary> - </entry> - - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Title != "Some Title" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } -} - -func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - - <entry> - <title type="html"><![CDATA[Test “Test”]]></title> - <link href="http://example.org/2003/12/13/atom03"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <updated>2003-12-13T18:30:02Z</updated> - <summary>Some text.</summary> - </entry> - - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Title != "Test “Test”" { - t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) - } -} - -func TestParseEntryTitleWithHTML(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - - <entry> - <title type="html"><code>Test</code> Test</title> - <link href="http://example.org/2003/12/13/atom03"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <updated>2003-12-13T18:30:02Z</updated> - <summary>Some text.</summary> - </entry> - - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Title != "Test Test" { - t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) - } -} - -func TestParseEntryTitleWithXHTML(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - - <entry> - <title type="xhtml"><code>Test</code> Test</title> - <link href="http://example.org/2003/12/13/atom03"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <updated>2003-12-13T18:30:02Z</updated> - <summary>Some text.</summary> - </entry> - - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Title != "Test Test" { - t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) - } -} - -func TestParseEntrySummaryWithXHTML(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - - <entry> - <title type="xhtml"><code>Test</code> Test</title> - <link href="http://example.org/2003/12/13/atom03"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <updated>2003-12-13T18:30:02Z</updated> - <summary type="xhtml"><p>Some text.</p></summary> - </entry> - - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Content != "<p>Some text.</p>" { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } -} - -func TestParseEntrySummaryWithHTML(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - - <entry> - <title type="html"><code>Test</code> Test</title> - <link href="http://example.org/2003/12/13/atom03"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <updated>2003-12-13T18:30:02Z</updated> - <summary type="html"><![CDATA[<p>Some text.</p>]]></summary> - </entry> - - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Content != "<p>Some text.</p>" { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } -} - -func TestParseEntrySummaryWithPlainText(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - - <entry> - <title type="html"><code>Test</code> Test</title> - <link href="http://example.org/2003/12/13/atom03"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <updated>2003-12-13T18:30:02Z</updated> - <summary type="text"><![CDATA[<Some text.>]]></summary> - </entry> - - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Content != "<Some text.>" { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } -} - -func TestParseEntryWithAuthorName(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - - <entry> - <link href="http://example.org/2003/12/13/atom03"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <updated>2003-12-13T18:30:02Z</updated> - <summary>Some text.</summary> - <author> - <name>Me</name> - <email>me@localhost</email> - </author> - </entry> - - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Author != "Me" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) + version := getAtomFeedVersion(bytes.NewBufferString(data)) + if version != "1.0" { + t.Errorf(`Invalid Atom version detected: %s`, version) } } -func TestParseEntryWithoutAuthorName(t *testing.T) { +func TestDetectAtom03(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - - <entry> - <link href="http://example.org/2003/12/13/atom03"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <updated>2003-12-13T18:30:02Z</updated> - <summary>Some text.</summary> - <author> - <name/> - <email>me@localhost</email> - </author> - </entry> - - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Author != "me@localhost" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseEntryWithEnclosures(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <id>http://www.example.org/myfeed</id> - <title>My Podcast Feed</title> - <updated>2005-07-15T12:00:00Z</updated> - <author> - <name>John Doe</name> - </author> - <link href="http://example.org" /> - <link rel="self" href="http://example.org/myfeed" /> + <feed version="0.3" xmlns="http://purl.org/atom/ns#"> + <title>dive into mark</title> + <link rel="alternate" type="text/html" href="http://diveintomark.org/"/> + <modified>2003-12-13T18:30:02Z</modified> + <author><name>Mark Pilgrim</name></author> <entry> - <id>http://www.example.org/entries/1</id> - <title>Atom 1.0</title> - <updated>2005-07-15T12:00:00Z</updated> - <link href="http://www.example.org/entries/1" /> - <summary>An overview of Atom 1.0</summary> - <link rel="enclosure" - type="audio/mpeg" - title="MP3" - href="http://www.example.org/myaudiofile.mp3" - length="1234" /> - <link rel="enclosure" - type="application/x-bittorrent" - title="BitTorrent" - href="http://www.example.org/myaudiofile.torrent" - length="4567" /> - <content type="xhtml"> - <div xmlns="http://www.w3.org/1999/xhtml"> - <h1>Show Notes</h1> - <ul> - <li>00:01:00 -- Introduction</li> - <li>00:15:00 -- Talking about Atom 1.0</li> - <li>00:30:00 -- Wrapping up</li> - </ul> - </div> - </content> + <title>Atom 0.3 snapshot</title> + <link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/> + <id>tag:diveintomark.org,2003:3.2397</id> + <issued>2003-12-13T08:29:29-04:00</issued> + <modified>2003-12-13T18:30:02Z</modified> + <summary type="text/plain">This is a test</summary> + <content type="text/html" mode="escaped"><![CDATA[<p>HTML content</p>]]></content> </entry> - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if len(feed.Entries[0].Enclosures) != 2 { - t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) - } - - expectedResults := []struct { - url string - mimeType string - size int64 - }{ - {"http://www.example.org/myaudiofile.mp3", "audio/mpeg", 1234}, - {"http://www.example.org/myaudiofile.torrent", "application/x-bittorrent", 4567}, - } - - for index, enclosure := range feed.Entries[0].Enclosures { - if expectedResults[index].url != enclosure.URL { - t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) - } - - if expectedResults[index].mimeType != enclosure.MimeType { - t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) - } - - if expectedResults[index].size != enclosure.Size { - t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) - } - } -} - -func TestParseEntryWithPublished(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - - <entry> - <link href="http://example.org/2003/12/13/atom03"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <published>2003-12-13T18:30:02Z</published> - <summary>Some text.</summary> - </entry> - - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) { - t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) - } -} - -func TestParseEntryWithPublishedAndUpdated(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - - <entry> - <link href="http://example.org/2003/12/13/atom03"/> - <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> - <published>2002-11-12T18:30:02Z</published> - <updated>2003-12-13T18:30:02Z</updated> - <summary>Some text.</summary> - </entry> - </feed>` - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if !feed.Entries[0].Date.Equal(time.Date(2002, time.November, 12, 18, 30, 2, 0, time.UTC)) { - t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) - } -} - -func TestParseInvalidXml(t *testing.T) { - data := `garbage` - _, err := Parse(bytes.NewBufferString(data)) - if err == nil { - t.Error("Parse should returns an error") - } -} - -func TestParseWithHTMLEntity(t *testing.T) { - data := ` - <?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/"/> - </feed> - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Title != "Example \u00a0 Feed" { - t.Errorf(`Incorrect title, got: %q`, feed.Title) - } -} - -func TestParseWithInvalidCharacterEntity(t *testing.T) { - data := ` - <?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom"> - <title>Example Feed</title> - <link href="http://example.org/a&b"/> - </feed> - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.SiteURL != "http://example.org/a&b" { - t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL) - } -} - -func TestParseMediaGroup(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/"> - <id>http://www.example.org/myfeed</id> - <title>My Video Feed</title> - <updated>2005-07-15T12:00:00Z</updated> - <link href="http://example.org" /> - <link rel="self" href="http://example.org/myfeed" /> - <entry> - <id>http://www.example.org/entries/1</id> - <title>Some Video</title> - <updated>2005-07-15T12:00:00Z</updated> - <link href="http://www.example.org/entries/1" /> - <media:group> - <media:title>Another title</media:title> - <media:content url="https://www.youtube.com/v/abcd" type="application/x-shockwave-flash" width="640" height="390"/> - <media:thumbnail url="https://example.org/thumbnail.jpg" width="480" height="360"/> - <media:description>Some description -A website: http://example.org/</media:description> - </media:group> - </entry> - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].Content != `Some description<br>A website: <a href="http://example.org/">http://example.org/</a>` { - t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) - } - - if len(feed.Entries[0].Enclosures) != 2 { - t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) - } - - expectedResults := []struct { - url string - mimeType string - size int64 - }{ - {"https://example.org/thumbnail.jpg", "image/*", 0}, - {"https://www.youtube.com/v/abcd", "application/x-shockwave-flash", 0}, - } - - for index, enclosure := range feed.Entries[0].Enclosures { - if expectedResults[index].url != enclosure.URL { - t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) - } - - if expectedResults[index].mimeType != enclosure.MimeType { - t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) - } - - if expectedResults[index].size != enclosure.Size { - t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) - } - } -} - -func TestParseMediaElements(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/"> - <id>http://www.example.org/myfeed</id> - <title>My Video Feed</title> - <updated>2005-07-15T12:00:00Z</updated> - <link href="http://example.org" /> - <link rel="self" href="http://example.org/myfeed" /> - <entry> - <id>http://www.example.org/entries/1</id> - <title>Some Video</title> - <updated>2005-07-15T12:00:00Z</updated> - <link href="http://www.example.org/entries/1" /> - <media:title>Another title</media:title> - <media:content url="https://www.youtube.com/v/abcd" type="application/x-shockwave-flash" width="640" height="390"/> - <media:thumbnail url="https://example.org/thumbnail.jpg" width="480" height="360"/> - <media:description>Some description -A website: http://example.org/</media:description> - </entry> - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].Content != `Some description<br>A website: <a href="http://example.org/">http://example.org/</a>` { - t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) - } - - if len(feed.Entries[0].Enclosures) != 2 { - t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) - } - - expectedResults := []struct { - url string - mimeType string - size int64 - }{ - {"https://example.org/thumbnail.jpg", "image/*", 0}, - {"https://www.youtube.com/v/abcd", "application/x-shockwave-flash", 0}, - } - - for index, enclosure := range feed.Entries[0].Enclosures { - if expectedResults[index].url != enclosure.URL { - t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) - } - - if expectedResults[index].mimeType != enclosure.MimeType { - t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) - } - - if expectedResults[index].size != enclosure.Size { - t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) - } - } -} - -func TestParseRepliesLinkRelation(t *testing.T) { - data := `<?xml version="1.0" encoding="utf-8"?> - <feed xmlns="http://www.w3.org/2005/Atom" - xmlns:thr="http://purl.org/syndication/thread/1.0"> - <id>http://www.example.org/myfeed</id> - <title>My Example Feed</title> - <updated>2005-07-28T12:00:00Z</updated> - <link href="http://www.example.org/myfeed" /> - <author><name>James</name></author> - <entry> - <id>tag:entries.com,2005:1</id> - <title>My original entry</title> - <updated>2006-03-01T12:12:12Z</updated> - <link href="http://www.example.org/entries/1" /> - <link rel="replies" - type="application/atom+xml" - href="http://www.example.org/mycommentsfeed.xml" - thr:count="10" thr:updated="2005-07-28T12:10:00Z" /> - <link rel="replies" - type="text/html" - href="http://www.example.org/comments.html" - thr:count="10" thr:updated="2005-07-28T12:10:00Z" /> - <summary>This is my original entry</summary> - </entry> - </feed>` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].CommentsURL != "http://www.example.org/comments.html" { - t.Errorf("Incorrect entry comments URL, got: %s", feed.Entries[0].CommentsURL) + version := getAtomFeedVersion(bytes.NewBufferString(data)) + if version != "0.3" { + t.Errorf(`Invalid Atom version detected: %s`, version) } } diff --git a/reader/parser/format_test.go b/reader/parser/format_test.go index 3795541..b2836b6 100644 --- a/reader/parser/format_test.go +++ b/reader/parser/format_test.go @@ -26,7 +26,7 @@ func TestDetectRSS(t *testing.T) { } } -func TestDetectAtom(t *testing.T) { +func TestDetectAtom10(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>` format := DetectFeedFormat(data) @@ -35,6 +35,15 @@ func TestDetectAtom(t *testing.T) { } } +func TestDetectAtom03(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?><feed version="0.3" xmlns="http://purl.org/atom/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en"></feed>` + format := DetectFeedFormat(data) + + if format != FormatAtom { + t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom) + } +} + func TestDetectAtomWithISOCharset(t *testing.T) { data := `<?xml version="1.0" encoding="ISO-8859-15"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>` format := DetectFeedFormat(data) |