From 33fdb2c489727f8af972d69506fef02c121c4fd0 Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Sun, 22 Dec 2019 22:18:21 -0800 Subject: Add support for Atom 0.3 --- reader/atom/atom.go | 273 --------------- reader/atom/atom_03.go | 163 +++++++++ reader/atom/atom_03_test.go | 219 ++++++++++++ reader/atom/atom_10.go | 216 ++++++++++++ reader/atom/atom_10_test.go | 779 +++++++++++++++++++++++++++++++++++++++++++ reader/atom/atom_common.go | 68 ++++ reader/atom/parser.go | 49 ++- reader/atom/parser_test.go | 759 ++--------------------------------------- reader/parser/format_test.go | 11 +- 9 files changed, 1519 insertions(+), 1018 deletions(-) delete mode 100644 reader/atom/atom.go create mode 100644 reader/atom/atom_03.go create mode 100644 reader/atom/atom_03_test.go create mode 100644 reader/atom/atom_10.go create mode 100644 reader/atom/atom_10_test.go create mode 100644 reader/atom/atom_common.go diff --git a/reader/atom/atom.go b/reader/atom/atom.go deleted file mode 100644 index 677f69a..0000000 --- a/reader/atom/atom.go +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package atom // import "miniflux.app/reader/atom" - -import ( - "encoding/xml" - "html" - "strconv" - "strings" - "time" - - "miniflux.app/crypto" - "miniflux.app/logger" - "miniflux.app/model" - "miniflux.app/reader/date" - "miniflux.app/reader/media" - "miniflux.app/reader/sanitizer" - "miniflux.app/url" -) - -type atomFeed struct { - XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"` - ID string `xml:"id"` - Title string `xml:"title"` - Author atomAuthor `xml:"author"` - Links []atomLink `xml:"link"` - Entries []atomEntry `xml:"entry"` -} - -type atomEntry struct { - ID string `xml:"id"` - Title atomContent `xml:"title"` - Published string `xml:"published"` - Updated string `xml:"updated"` - Links []atomLink `xml:"link"` - Summary atomContent `xml:"summary"` - Content atomContent `xml:"http://www.w3.org/2005/Atom content"` - Author atomAuthor `xml:"author"` - media.Element -} - -type atomAuthor struct { - Name string `xml:"name"` - Email string `xml:"email"` -} - -type atomLink struct { - URL string `xml:"href,attr"` - Type string `xml:"type,attr"` - Rel string `xml:"rel,attr"` - Length string `xml:"length,attr"` -} - -type atomContent struct { - Type string `xml:"type,attr"` - Data string `xml:",chardata"` - XML string `xml:",innerxml"` -} - -func (a *atomFeed) Transform() *model.Feed { - feed := new(model.Feed) - feed.FeedURL = getRelationURL(a.Links, "self") - feed.SiteURL = getURL(a.Links) - feed.Title = strings.TrimSpace(a.Title) - - if feed.Title == "" { - feed.Title = feed.SiteURL - } - - for _, entry := range a.Entries { - item := entry.Transform() - entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL) - if err == nil { - item.URL = entryURL - } - - if item.Author == "" { - item.Author = getAuthor(a.Author) - } - - if item.Title == "" { - item.Title = item.URL - } - - feed.Entries = append(feed.Entries, item) - } - - return feed -} - -func (a *atomEntry) Transform() *model.Entry { - entry := new(model.Entry) - entry.URL = getURL(a.Links) - entry.Date = getDate(a) - entry.Author = getAuthor(a.Author) - entry.Hash = getHash(a) - entry.Content = getContent(a) - entry.Title = getTitle(a) - entry.Enclosures = getEnclosures(a) - entry.CommentsURL = getRelationURLWithType(a.Links, "replies", "text/html") - return entry -} - -func getURL(links []atomLink) string { - for _, link := range links { - if strings.ToLower(link.Rel) == "alternate" { - return strings.TrimSpace(link.URL) - } - - if link.Rel == "" && link.Type == "" { - return strings.TrimSpace(link.URL) - } - } - - return "" -} - -func getRelationURL(links []atomLink, relation string) string { - for _, link := range links { - if strings.ToLower(link.Rel) == relation { - return strings.TrimSpace(link.URL) - } - } - - return "" -} - -func getRelationURLWithType(links []atomLink, relation, contentType string) string { - for _, link := range links { - if strings.ToLower(link.Rel) == relation && strings.ToLower(link.Type) == contentType { - return strings.TrimSpace(link.URL) - } - } - - return "" -} - -func getDate(a *atomEntry) time.Time { - // Note: The published date represents the original creation date for YouTube feeds. - // Example: - // 2019-01-26T08:02:28+00:00 - // 2019-01-29T07:27:27+00:00 - dateText := a.Published - if dateText == "" { - dateText = a.Updated - } - - if dateText != "" { - result, err := date.Parse(dateText) - if err != nil { - logger.Error("atom: %v", err) - return time.Now() - } - - return result - } - - return time.Now() -} - -func atomContentToString(c atomContent) string { - if c.Type == "xhtml" { - return c.XML - } - - if c.Type == "html" { - return c.Data - } - - if c.Type == "text" || c.Type == "" { - return html.EscapeString(c.Data) - } - - return "" -} - -func getContent(a *atomEntry) string { - r := atomContentToString(a.Content) - if r != "" { - return r - } - - r = atomContentToString(a.Summary) - if r != "" { - return r - } - - mediaDescription := a.FirstMediaDescription() - if mediaDescription != "" { - return mediaDescription - } - - return "" -} - -func getTitle(a *atomEntry) string { - title := atomContentToString(a.Title) - return strings.TrimSpace(sanitizer.StripTags(title)) -} - -func getHash(a *atomEntry) string { - for _, value := range []string{a.ID, getURL(a.Links)} { - if value != "" { - return crypto.Hash(value) - } - } - - return "" -} - -func getEnclosures(a *atomEntry) model.EnclosureList { - enclosures := make(model.EnclosureList, 0) - duplicates := make(map[string]bool, 0) - - for _, mediaThumbnail := range a.AllMediaThumbnails() { - if _, found := duplicates[mediaThumbnail.URL]; !found { - duplicates[mediaThumbnail.URL] = true - enclosures = append(enclosures, &model.Enclosure{ - URL: mediaThumbnail.URL, - MimeType: mediaThumbnail.MimeType(), - Size: mediaThumbnail.Size(), - }) - } - } - - for _, link := range a.Links { - if strings.ToLower(link.Rel) == "enclosure" { - if _, found := duplicates[link.URL]; !found { - duplicates[link.URL] = true - length, _ := strconv.ParseInt(link.Length, 10, 0) - enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length}) - } - } - } - - for _, mediaContent := range a.AllMediaContents() { - if _, found := duplicates[mediaContent.URL]; !found { - duplicates[mediaContent.URL] = true - enclosures = append(enclosures, &model.Enclosure{ - URL: mediaContent.URL, - MimeType: mediaContent.MimeType(), - Size: mediaContent.Size(), - }) - } - } - - for _, mediaPeerLink := range a.AllMediaPeerLinks() { - if _, found := duplicates[mediaPeerLink.URL]; !found { - duplicates[mediaPeerLink.URL] = true - enclosures = append(enclosures, &model.Enclosure{ - URL: mediaPeerLink.URL, - MimeType: mediaPeerLink.MimeType(), - Size: mediaPeerLink.Size(), - }) - } - } - - return enclosures -} - -func getAuthor(author atomAuthor) string { - if author.Name != "" { - return strings.TrimSpace(author.Name) - } - - if author.Email != "" { - return strings.TrimSpace(author.Email) - } - - return "" -} diff --git a/reader/atom/atom_03.go b/reader/atom/atom_03.go new file mode 100644 index 0000000..7a86204 --- /dev/null +++ b/reader/atom/atom_03.go @@ -0,0 +1,163 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package atom // import "miniflux.app/reader/atom" + +import ( + "encoding/base64" + "html" + "strings" + "time" + + "miniflux.app/crypto" + "miniflux.app/logger" + "miniflux.app/model" + "miniflux.app/reader/date" + "miniflux.app/reader/sanitizer" + "miniflux.app/url" +) + +// Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html +type atom03Feed struct { + ID string `xml:"id"` + Title atom03Text `xml:"title"` + Author atomPerson `xml:"author"` + Links atomLinks `xml:"link"` + Entries []atom03Entry `xml:"entry"` +} + +func (a *atom03Feed) Transform() *model.Feed { + feed := new(model.Feed) + feed.FeedURL = a.Links.firstLinkWithRelation("self") + feed.SiteURL = a.Links.originalLink() + feed.Title = a.Title.String() + + if feed.Title == "" { + feed.Title = feed.SiteURL + } + + for _, entry := range a.Entries { + item := entry.Transform() + entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL) + if err == nil { + item.URL = entryURL + } + + if item.Author == "" { + item.Author = a.Author.String() + } + + if item.Title == "" { + item.Title = item.URL + } + + feed.Entries = append(feed.Entries, item) + } + + return feed +} + +type atom03Entry struct { + ID string `xml:"id"` + Title atom03Text `xml:"title"` + Modified string `xml:"modified"` + Issued string `xml:"issued"` + Created string `xml:"created"` + Links atomLinks `xml:"link"` + Summary atom03Text `xml:"summary"` + Content atom03Text `xml:"content"` + Author atomPerson `xml:"author"` +} + +func (a *atom03Entry) Transform() *model.Entry { + entry := new(model.Entry) + entry.URL = a.Links.originalLink() + entry.Date = a.entryDate() + entry.Author = a.Author.String() + entry.Hash = a.entryHash() + entry.Content = a.entryContent() + entry.Title = a.entryTitle() + return entry +} + +func (a *atom03Entry) entryTitle() string { + return sanitizer.StripTags(a.Title.String()) +} + +func (a *atom03Entry) entryContent() string { + content := a.Content.String() + if content != "" { + return content + } + + summary := a.Summary.String() + if summary != "" { + return summary + } + + return "" +} + +func (a *atom03Entry) entryDate() time.Time { + dateText := "" + for _, value := range []string{a.Issued, a.Modified, a.Created} { + if value != "" { + dateText = value + break + } + } + + if dateText != "" { + result, err := date.Parse(dateText) + if err != nil { + logger.Error("atom: %v", err) + return time.Now() + } + + return result + } + + return time.Now() +} + +func (a *atom03Entry) entryHash() string { + for _, value := range []string{a.ID, a.Links.originalLink()} { + if value != "" { + return crypto.Hash(value) + } + } + + return "" +} + +type atom03Text struct { + Type string `xml:"type,attr"` + Mode string `xml:"mode,attr"` + Data string `xml:",chardata"` + XML string `xml:",innerxml"` +} + +func (a *atom03Text) String() string { + content := "" + + switch { + case a.Mode == "xml": + content = a.XML + case a.Mode == "escaped": + content = a.Data + case a.Mode == "base64": + b, err := base64.StdEncoding.DecodeString(a.Data) + if err == nil { + content = string(b) + } + default: + content = a.Data + } + + if a.Type != "text/html" { + content = html.EscapeString(content) + } + + return strings.TrimSpace(content) +} diff --git a/reader/atom/atom_03_test.go b/reader/atom/atom_03_test.go new file mode 100644 index 0000000..063b02c --- /dev/null +++ b/reader/atom/atom_03_test.go @@ -0,0 +1,219 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package atom // import "miniflux.app/reader/atom" + +import ( + "bytes" + "testing" + "time" +) + +func TestParseAtom03(t *testing.T) { + data := ` + + dive into mark + + 2003-12-13T18:30:02Z + Mark Pilgrim + + Atom 0.3 snapshot + + tag:diveintomark.org,2003:3.2397 + 2003-12-13T08:29:29-04:00 + 2003-12-13T18:30:02Z + It's a test + HTML content

]]>
+
+
` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Title != "dive into mark" { + t.Errorf("Incorrect title, got: %s", feed.Title) + } + + if feed.FeedURL != "" { + t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) + } + + if feed.SiteURL != "http://diveintomark.org/" { + t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + tz := time.FixedZone("Test Case Time", -int((4 * time.Hour).Seconds())) + if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 8, 29, 29, 0, tz)) { + t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) + } + + if feed.Entries[0].Hash != "b70d30334b808f32e66eb19fabb263525cecd18f205720b583e84f7f295cf728" { + t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) + } + + if feed.Entries[0].URL != "http://diveintomark.org/2003/12/13/atom03" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if feed.Entries[0].Title != "Atom 0.3 snapshot" { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } + + if feed.Entries[0].Content != "

HTML content

" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } + + if feed.Entries[0].Author != "Mark Pilgrim" { + t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) + } +} + +func TestParseAtom03WithoutFeedTitle(t *testing.T) { + data := ` + + + 2003-12-13T18:30:02Z + Mark Pilgrim + + Atom 0.3 snapshot + + tag:diveintomark.org,2003:3.2397 + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Title != "http://diveintomark.org/" { + t.Errorf("Incorrect title, got: %s", feed.Title) + } +} + +func TestParseAtom03WithoutEntryTitle(t *testing.T) { + data := ` + + dive into mark + + 2003-12-13T18:30:02Z + Mark Pilgrim + + + tag:diveintomark.org,2003:3.2397 + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].Title != "http://diveintomark.org/2003/12/13/atom03" { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } +} + +func TestParseAtom03WithSummaryOnly(t *testing.T) { + data := ` + + dive into mark + + 2003-12-13T18:30:02Z + Mark Pilgrim + + Atom 0.3 snapshot + + tag:diveintomark.org,2003:3.2397 + 2003-12-13T08:29:29-04:00 + 2003-12-13T18:30:02Z + It's a test + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].Content != "It's a test" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseAtom03WithXMLContent(t *testing.T) { + data := ` + + dive into mark + + 2003-12-13T18:30:02Z + Mark Pilgrim + + Atom 0.3 snapshot + + tag:diveintomark.org,2003:3.2397 + 2003-12-13T08:29:29-04:00 + 2003-12-13T18:30:02Z +

Some text.

+
+
` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].Content != "

Some text.

" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseAtom03WithBase64Content(t *testing.T) { + data := ` + + dive into mark + + 2003-12-13T18:30:02Z + Mark Pilgrim + + Atom 0.3 snapshot + + tag:diveintomark.org,2003:3.2397 + 2003-12-13T08:29:29-04:00 + 2003-12-13T18:30:02Z + PHA+U29tZSB0ZXh0LjwvcD4= + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].Content != "

Some text.

" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} diff --git a/reader/atom/atom_10.go b/reader/atom/atom_10.go new file mode 100644 index 0000000..099cbed --- /dev/null +++ b/reader/atom/atom_10.go @@ -0,0 +1,216 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package atom // import "miniflux.app/reader/atom" + +import ( + "encoding/xml" + "html" + "strconv" + "strings" + "time" + + "miniflux.app/crypto" + "miniflux.app/logger" + "miniflux.app/model" + "miniflux.app/reader/date" + "miniflux.app/reader/media" + "miniflux.app/reader/sanitizer" + "miniflux.app/url" +) + +// Specs: +// https://tools.ietf.org/html/rfc4287 +// https://validator.w3.org/feed/docs/atom.html +type atom10Feed struct { + XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"` + ID string `xml:"id"` + Title atom10Text `xml:"title"` + Author atomPerson `xml:"author"` + Links atomLinks `xml:"link"` + Entries []atom10Entry `xml:"entry"` +} + +func (a *atom10Feed) Transform() *model.Feed { + feed := new(model.Feed) + feed.FeedURL = a.Links.firstLinkWithRelation("self") + feed.SiteURL = a.Links.originalLink() + feed.Title = a.Title.String() + + if feed.Title == "" { + feed.Title = feed.SiteURL + } + + for _, entry := range a.Entries { + item := entry.Transform() + entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL) + if err == nil { + item.URL = entryURL + } + + if item.Author == "" { + item.Author = a.Author.String() + } + + if item.Title == "" { + item.Title = item.URL + } + + feed.Entries = append(feed.Entries, item) + } + + return feed +} + +type atom10Entry struct { + ID string `xml:"id"` + Title atom10Text `xml:"title"` + Published string `xml:"published"` + Updated string `xml:"updated"` + Links atomLinks `xml:"link"` + Summary atom10Text `xml:"summary"` + Content atom10Text `xml:"http://www.w3.org/2005/Atom content"` + Author atomPerson `xml:"author"` + media.Element +} + +func (a *atom10Entry) Transform() *model.Entry { + entry := new(model.Entry) + entry.URL = a.Links.originalLink() + entry.Date = a.entryDate() + entry.Author = a.Author.String() + entry.Hash = a.entryHash() + entry.Content = a.entryContent() + entry.Title = a.entryTitle() + entry.Enclosures = a.entryEnclosures() + entry.CommentsURL = a.Links.firstLinkWithRelationAndType("replies", "text/html") + return entry +} + +func (a *atom10Entry) entryTitle() string { + return sanitizer.StripTags(a.Title.String()) +} + +func (a *atom10Entry) entryContent() string { + content := a.Content.String() + if content != "" { + return content + } + + summary := a.Summary.String() + if summary != "" { + return summary + } + + mediaDescription := a.FirstMediaDescription() + if mediaDescription != "" { + return mediaDescription + } + + return "" +} + +// Note: The published date represents the original creation date for YouTube feeds. +// Example: +// 2019-01-26T08:02:28+00:00 +// 2019-01-29T07:27:27+00:00 +func (a *atom10Entry) entryDate() time.Time { + dateText := a.Published + if dateText == "" { + dateText = a.Updated + } + + if dateText != "" { + result, err := date.Parse(dateText) + if err != nil { + logger.Error("atom: %v", err) + return time.Now() + } + + return result + } + + return time.Now() +} + +func (a *atom10Entry) entryHash() string { + for _, value := range []string{a.ID, a.Links.originalLink()} { + if value != "" { + return crypto.Hash(value) + } + } + + return "" +} + +func (a *atom10Entry) entryEnclosures() model.EnclosureList { + enclosures := make(model.EnclosureList, 0) + duplicates := make(map[string]bool, 0) + + for _, mediaThumbnail := range a.AllMediaThumbnails() { + if _, found := duplicates[mediaThumbnail.URL]; !found { + duplicates[mediaThumbnail.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaThumbnail.URL, + MimeType: mediaThumbnail.MimeType(), + Size: mediaThumbnail.Size(), + }) + } + } + + for _, link := range a.Links { + if strings.ToLower(link.Rel) == "enclosure" { + if _, found := duplicates[link.URL]; !found { + duplicates[link.URL] = true + length, _ := strconv.ParseInt(link.Length, 10, 0) + enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length}) + } + } + } + + for _, mediaContent := range a.AllMediaContents() { + if _, found := duplicates[mediaContent.URL]; !found { + duplicates[mediaContent.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaContent.URL, + MimeType: mediaContent.MimeType(), + Size: mediaContent.Size(), + }) + } + } + + for _, mediaPeerLink := range a.AllMediaPeerLinks() { + if _, found := duplicates[mediaPeerLink.URL]; !found { + duplicates[mediaPeerLink.URL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaPeerLink.URL, + MimeType: mediaPeerLink.MimeType(), + Size: mediaPeerLink.Size(), + }) + } + } + + return enclosures +} + +type atom10Text struct { + Type string `xml:"type,attr"` + Data string `xml:",chardata"` + XML string `xml:",innerxml"` +} + +func (a *atom10Text) String() string { + content := "" + + switch { + case a.Type == "xhtml": + content = a.XML + case a.Type == "html": + content = a.Data + case a.Type == "text" || a.Type == "": + content = html.EscapeString(a.Data) + } + + return strings.TrimSpace(content) +} diff --git a/reader/atom/atom_10_test.go b/reader/atom/atom_10_test.go new file mode 100644 index 0000000..63127c4 --- /dev/null +++ b/reader/atom/atom_10_test.go @@ -0,0 +1,779 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package atom // import "miniflux.app/reader/atom" + +import ( + "bytes" + "testing" + "time" +) + +func TestParseAtomSample(t *testing.T) { + data := ` + + + Example Feed + + 2003-12-13T18:30:02Z + + John Doe + + urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6 + + + Atom-Powered Robots Run Amok + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Title != "Example Feed" { + t.Errorf("Incorrect title, got: %s", feed.Title) + } + + if feed.FeedURL != "" { + t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) + } + + if feed.SiteURL != "http://example.org/" { + t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) { + t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) + } + + if feed.Entries[0].Hash != "3841e5cf232f5111fc5841e9eba5f4b26d95e7d7124902e0f7272729d65601a6" { + t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) + } + + if feed.Entries[0].URL != "http://example.org/2003/12/13/atom03" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if feed.Entries[0].CommentsURL != "" { + t.Errorf("Incorrect entry Comments URL, got: %s", feed.Entries[0].CommentsURL) + } + + if feed.Entries[0].Title != "Atom-Powered Robots Run Amok" { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } + + if feed.Entries[0].Content != "Some text." { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } + + if feed.Entries[0].Author != "John Doe" { + t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) + } +} + +func TestParseFeedWithoutTitle(t *testing.T) { + data := ` + + + + 2003-12-13T18:30:02Z + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Title != "https://example.org/" { + t.Errorf("Incorrect feed title, got: %s", feed.Title) + } +} + +func TestParseEntryWithoutTitle(t *testing.T) { + data := ` + + + Example Feed + + 2003-12-13T18:30:02Z + + John Doe + + urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6 + + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "http://example.org/2003/12/13/atom03" { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } +} + +func TestParseFeedURL(t *testing.T) { + data := ` + + Example Feed + + + 2003-12-13T18:30:02Z + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.SiteURL != "https://example.org/" { + t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) + } + + if feed.FeedURL != "https://example.org/feed" { + t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) + } +} + +func TestParseEntryWithRelativeURL(t *testing.T) { + data := ` + + Example Feed + + + + Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].URL != "http://example.org/something.html" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } +} + +func TestParseEntryTitleWithWhitespaces(t *testing.T) { + data := ` + + Example Feed + + + + + Some Title + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "Some Title" { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } +} + +func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) { + data := ` + + Example Feed + + + + <![CDATA[Test “Test”]]> + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "Test “Test”" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + +func TestParseEntryTitleWithHTML(t *testing.T) { + data := ` + + Example Feed + + + + <code>Test</code> Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "Test Test" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + +func TestParseEntryTitleWithXHTML(t *testing.T) { + data := ` + + Example Feed + + + + <code>Test</code> Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "Test Test" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + +func TestParseEntrySummaryWithXHTML(t *testing.T) { + data := ` + + Example Feed + + + + <code>Test</code> Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z +

Some text.

+
+ +
` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Content != "

Some text.

" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseEntrySummaryWithHTML(t *testing.T) { + data := ` + + Example Feed + + + + <code>Test</code> Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text.

]]>
+
+ +
` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Content != "

Some text.

" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseEntrySummaryWithPlainText(t *testing.T) { + data := ` + + Example Feed + + + + <code>Test</code> Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + ]]> + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Content != "<Some text.>" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseEntryWithAuthorName(t *testing.T) { + data := ` + + Example Feed + + + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + Me + me@localhost + + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Author != "Me" { + t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) + } +} + +func TestParseEntryWithoutAuthorName(t *testing.T) { + data := ` + + Example Feed + + + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + me@localhost + + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Author != "me@localhost" { + t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) + } +} + +func TestParseEntryWithEnclosures(t *testing.T) { + data := ` + + http://www.example.org/myfeed + My Podcast Feed + 2005-07-15T12:00:00Z + + John Doe + + + + + http://www.example.org/entries/1 + Atom 1.0 + 2005-07-15T12:00:00Z + + An overview of Atom 1.0 + + + +
+

Show Notes

+
    +
  • 00:01:00 -- Introduction
  • +
  • 00:15:00 -- Talking about Atom 1.0
  • +
  • 00:30:00 -- Wrapping up
  • +
+
+
+
+
` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].URL != "http://www.example.org/entries/1" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if len(feed.Entries[0].Enclosures) != 2 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + expectedResults := []struct { + url string + mimeType string + size int64 + }{ + {"http://www.example.org/myaudiofile.mp3", "audio/mpeg", 1234}, + {"http://www.example.org/myaudiofile.torrent", "application/x-bittorrent", 4567}, + } + + for index, enclosure := range feed.Entries[0].Enclosures { + if expectedResults[index].url != enclosure.URL { + t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) + } + + if expectedResults[index].mimeType != enclosure.MimeType { + t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) + } + + if expectedResults[index].size != enclosure.Size { + t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) + } + } +} + +func TestParseEntryWithPublished(t *testing.T) { + data := ` + + Example Feed + + + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) { + t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) + } +} + +func TestParseEntryWithPublishedAndUpdated(t *testing.T) { + data := ` + + Example Feed + + + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2002-11-12T18:30:02Z + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if !feed.Entries[0].Date.Equal(time.Date(2002, time.November, 12, 18, 30, 2, 0, time.UTC)) { + t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) + } +} + +func TestParseInvalidXml(t *testing.T) { + data := `garbage` + _, err := Parse(bytes.NewBufferString(data)) + if err == nil { + t.Error("Parse should returns an error") + } +} + +func TestParseWithHTMLEntity(t *testing.T) { + data := ` + + + Example   Feed + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Title != "Example \u00a0 Feed" { + t.Errorf(`Incorrect title, got: %q`, feed.Title) + } +} + +func TestParseWithInvalidCharacterEntity(t *testing.T) { + data := ` + + + Example Feed + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.SiteURL != "http://example.org/a&b" { + t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL) + } +} + +func TestParseMediaGroup(t *testing.T) { + data := ` + + http://www.example.org/myfeed + My Video Feed + 2005-07-15T12:00:00Z + + + + http://www.example.org/entries/1 + Some Video + 2005-07-15T12:00:00Z + + + Another title + + + Some description +A website: http://example.org/ + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].URL != "http://www.example.org/entries/1" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if feed.Entries[0].Content != `Some description
A website: http://example.org/` { + t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + } + + if len(feed.Entries[0].Enclosures) != 2 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + expectedResults := []struct { + url string + mimeType string + size int64 + }{ + {"https://example.org/thumbnail.jpg", "image/*", 0}, + {"https://www.youtube.com/v/abcd", "application/x-shockwave-flash", 0}, + } + + for index, enclosure := range feed.Entries[0].Enclosures { + if expectedResults[index].url != enclosure.URL { + t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) + } + + if expectedResults[index].mimeType != enclosure.MimeType { + t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) + } + + if expectedResults[index].size != enclosure.Size { + t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) + } + } +} + +func TestParseMediaElements(t *testing.T) { + data := ` + + http://www.example.org/myfeed + My Video Feed + 2005-07-15T12:00:00Z + + + + http://www.example.org/entries/1 + Some Video + 2005-07-15T12:00:00Z + + Another title + + + Some description +A website: http://example.org/ + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].URL != "http://www.example.org/entries/1" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if feed.Entries[0].Content != `Some description
A website: http://example.org/` { + t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + } + + if len(feed.Entries[0].Enclosures) != 2 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + expectedResults := []struct { + url string + mimeType string + size int64 + }{ + {"https://example.org/thumbnail.jpg", "image/*", 0}, + {"https://www.youtube.com/v/abcd", "application/x-shockwave-flash", 0}, + } + + for index, enclosure := range feed.Entries[0].Enclosures { + if expectedResults[index].url != enclosure.URL { + t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) + } + + if expectedResults[index].mimeType != enclosure.MimeType { + t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) + } + + if expectedResults[index].size != enclosure.Size { + t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) + } + } +} + +func TestParseRepliesLinkRelation(t *testing.T) { + data := ` + + http://www.example.org/myfeed + My Example Feed + 2005-07-28T12:00:00Z + + James + + tag:entries.com,2005:1 + My original entry + 2006-03-01T12:12:12Z + + + + This is my original entry + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].URL != "http://www.example.org/entries/1" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if feed.Entries[0].CommentsURL != "http://www.example.org/comments.html" { + t.Errorf("Incorrect entry comments URL, got: %s", feed.Entries[0].CommentsURL) + } +} diff --git a/reader/atom/atom_common.go b/reader/atom/atom_common.go new file mode 100644 index 0000000..85e6b29 --- /dev/null +++ b/reader/atom/atom_common.go @@ -0,0 +1,68 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package atom // import "miniflux.app/reader/atom" + +import "strings" + +type atomPerson struct { + Name string `xml:"name"` + Email string `xml:"email"` +} + +func (a *atomPerson) String() string { + name := "" + + switch { + case a.Name != "": + name = a.Name + case a.Email != "": + name = a.Email + } + + return strings.TrimSpace(name) +} + +type atomLink struct { + URL string `xml:"href,attr"` + Type string `xml:"type,attr"` + Rel string `xml:"rel,attr"` + Length string `xml:"length,attr"` +} + +type atomLinks []*atomLink + +func (a atomLinks) originalLink() string { + for _, link := range a { + if strings.ToLower(link.Rel) == "alternate" { + return strings.TrimSpace(link.URL) + } + + if link.Rel == "" && link.Type == "" { + return strings.TrimSpace(link.URL) + } + } + + return "" +} + +func (a atomLinks) firstLinkWithRelation(relation string) string { + for _, link := range a { + if strings.ToLower(link.Rel) == relation { + return strings.TrimSpace(link.URL) + } + } + + return "" +} + +func (a atomLinks) firstLinkWithRelationAndType(relation, contentType string) string { + for _, link := range a { + if strings.ToLower(link.Rel) == relation && strings.ToLower(link.Type) == contentType { + return strings.TrimSpace(link.URL) + } + } + + return "" +} diff --git a/reader/atom/parser.go b/reader/atom/parser.go index 90a84aa..9a9cb57 100644 --- a/reader/atom/parser.go +++ b/reader/atom/parser.go @@ -5,21 +5,58 @@ package atom // import "miniflux.app/reader/atom" import ( + "bytes" + "encoding/xml" "io" "miniflux.app/errors" "miniflux.app/model" - "miniflux.app/reader/xml" + xml_decoder "miniflux.app/reader/xml" ) +type atomFeed interface { + Transform() *model.Feed +} + // Parse returns a normalized feed struct from a Atom feed. -func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) { - atomFeed := new(atomFeed) - decoder := xml.NewDecoder(data) - err := decoder.Decode(atomFeed) +func Parse(r io.Reader) (*model.Feed, *errors.LocalizedError) { + var buf bytes.Buffer + tee := io.TeeReader(r, &buf) + + var rawFeed atomFeed + if getAtomFeedVersion(tee) == "0.3" { + rawFeed = new(atom03Feed) + } else { + rawFeed = new(atom10Feed) + } + + decoder := xml_decoder.NewDecoder(&buf) + err := decoder.Decode(rawFeed) if err != nil { return nil, errors.NewLocalizedError("Unable to parse Atom feed: %q", err) } - return atomFeed.Transform(), nil + return rawFeed.Transform(), nil +} + +func getAtomFeedVersion(data io.Reader) string { + decoder := xml_decoder.NewDecoder(data) + for { + token, _ := decoder.Token() + if token == nil { + break + } + + if element, ok := token.(xml.StartElement); ok { + if element.Name.Local == "feed" { + for _, attr := range element.Attr { + if attr.Name.Local == "version" && attr.Value == "0.3" { + return "0.3" + } + } + return "1.0" + } + } + } + return "1.0" } diff --git a/reader/atom/parser_test.go b/reader/atom/parser_test.go index 8fcf7d0..fac14ef 100644 --- a/reader/atom/parser_test.go +++ b/reader/atom/parser_test.go @@ -1,4 +1,4 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. +// Copyright 2019 Frédéric Guillot. All rights reserved. // Use of this source code is governed by the Apache 2.0 // license that can be found in the LICENSE file. @@ -7,10 +7,9 @@ package atom // import "miniflux.app/reader/atom" import ( "bytes" "testing" - "time" ) -func TestParseAtomSample(t *testing.T) { +func TestDetectAtom10(t *testing.T) { data := ` @@ -32,748 +31,32 @@ func TestParseAtomSample(t *testing.T) { ` - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Title != "Example Feed" { - t.Errorf("Incorrect title, got: %s", feed.Title) - } - - if feed.FeedURL != "" { - t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) - } - - if feed.SiteURL != "http://example.org/" { - t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) { - t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) - } - - if feed.Entries[0].Hash != "3841e5cf232f5111fc5841e9eba5f4b26d95e7d7124902e0f7272729d65601a6" { - t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) - } - - if feed.Entries[0].URL != "http://example.org/2003/12/13/atom03" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].CommentsURL != "" { - t.Errorf("Incorrect entry Comments URL, got: %s", feed.Entries[0].CommentsURL) - } - - if feed.Entries[0].Title != "Atom-Powered Robots Run Amok" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } - - if feed.Entries[0].Content != "Some text." { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } - - if feed.Entries[0].Author != "John Doe" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseFeedWithoutTitle(t *testing.T) { - data := ` - - - - 2003-12-13T18:30:02Z - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Title != "https://example.org/" { - t.Errorf("Incorrect feed title, got: %s", feed.Title) - } -} - -func TestParseEntryWithoutTitle(t *testing.T) { - data := ` - - - Example Feed - - 2003-12-13T18:30:02Z - - John Doe - - urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6 - - - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Title != "http://example.org/2003/12/13/atom03" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } -} - -func TestParseFeedURL(t *testing.T) { - data := ` - - Example Feed - - - 2003-12-13T18:30:02Z - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.SiteURL != "https://example.org/" { - t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) - } - - if feed.FeedURL != "https://example.org/feed" { - t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) - } -} - -func TestParseEntryWithRelativeURL(t *testing.T) { - data := ` - - Example Feed - - - - Test - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].URL != "http://example.org/something.html" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } -} - -func TestParseEntryTitleWithWhitespaces(t *testing.T) { - data := ` - - Example Feed - - - - - Some Title - - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Title != "Some Title" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } -} - -func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) { - data := ` - - Example Feed - - - - <![CDATA[Test “Test”]]> - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Title != "Test “Test”" { - t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) - } -} - -func TestParseEntryTitleWithHTML(t *testing.T) { - data := ` - - Example Feed - - - - <code>Test</code> Test - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Title != "Test Test" { - t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) - } -} - -func TestParseEntryTitleWithXHTML(t *testing.T) { - data := ` - - Example Feed - - - - <code>Test</code> Test - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Title != "Test Test" { - t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) - } -} - -func TestParseEntrySummaryWithXHTML(t *testing.T) { - data := ` - - Example Feed - - - - <code>Test</code> Test - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z -

Some text.

-
- -
` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Content != "

Some text.

" { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } -} - -func TestParseEntrySummaryWithHTML(t *testing.T) { - data := ` - - Example Feed - - - - <code>Test</code> Test - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text.

]]>
-
- -
` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Content != "

Some text.

" { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } -} - -func TestParseEntrySummaryWithPlainText(t *testing.T) { - data := ` - - Example Feed - - - - <code>Test</code> Test - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - ]]> - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Content != "<Some text.>" { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } -} - -func TestParseEntryWithAuthorName(t *testing.T) { - data := ` - - Example Feed - - - - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - Me - me@localhost - - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Author != "Me" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) + version := getAtomFeedVersion(bytes.NewBufferString(data)) + if version != "1.0" { + t.Errorf(`Invalid Atom version detected: %s`, version) } } -func TestParseEntryWithoutAuthorName(t *testing.T) { +func TestDetectAtom03(t *testing.T) { data := ` - - Example Feed - - - - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - me@localhost - - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Author != "me@localhost" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseEntryWithEnclosures(t *testing.T) { - data := ` - - http://www.example.org/myfeed - My Podcast Feed - 2005-07-15T12:00:00Z - - John Doe - - - + + dive into mark + + 2003-12-13T18:30:02Z + Mark Pilgrim - http://www.example.org/entries/1 - Atom 1.0 - 2005-07-15T12:00:00Z - - An overview of Atom 1.0 - - - -
-

Show Notes

-
    -
  • 00:01:00 -- Introduction
  • -
  • 00:15:00 -- Talking about Atom 1.0
  • -
  • 00:30:00 -- Wrapping up
  • -
-
-
+ Atom 0.3 snapshot + + tag:diveintomark.org,2003:3.2397 + 2003-12-13T08:29:29-04:00 + 2003-12-13T18:30:02Z + This is a test + HTML content

]]>
-
` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if len(feed.Entries[0].Enclosures) != 2 { - t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) - } - - expectedResults := []struct { - url string - mimeType string - size int64 - }{ - {"http://www.example.org/myaudiofile.mp3", "audio/mpeg", 1234}, - {"http://www.example.org/myaudiofile.torrent", "application/x-bittorrent", 4567}, - } - - for index, enclosure := range feed.Entries[0].Enclosures { - if expectedResults[index].url != enclosure.URL { - t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) - } - - if expectedResults[index].mimeType != enclosure.MimeType { - t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) - } - - if expectedResults[index].size != enclosure.Size { - t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) - } - } -} - -func TestParseEntryWithPublished(t *testing.T) { - data := ` - - Example Feed - - - - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) { - t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) - } -} - -func TestParseEntryWithPublishedAndUpdated(t *testing.T) { - data := ` - - Example Feed - - - - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2002-11-12T18:30:02Z - 2003-12-13T18:30:02Z - Some text. - - ` - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if !feed.Entries[0].Date.Equal(time.Date(2002, time.November, 12, 18, 30, 2, 0, time.UTC)) { - t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) - } -} - -func TestParseInvalidXml(t *testing.T) { - data := `garbage` - _, err := Parse(bytes.NewBufferString(data)) - if err == nil { - t.Error("Parse should returns an error") - } -} - -func TestParseWithHTMLEntity(t *testing.T) { - data := ` - - - Example   Feed - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Title != "Example \u00a0 Feed" { - t.Errorf(`Incorrect title, got: %q`, feed.Title) - } -} - -func TestParseWithInvalidCharacterEntity(t *testing.T) { - data := ` - - - Example Feed - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.SiteURL != "http://example.org/a&b" { - t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL) - } -} - -func TestParseMediaGroup(t *testing.T) { - data := ` - - http://www.example.org/myfeed - My Video Feed - 2005-07-15T12:00:00Z - - - - http://www.example.org/entries/1 - Some Video - 2005-07-15T12:00:00Z - - - Another title - - - Some description -A website: http://example.org/ - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].Content != `Some description
A website: http://example.org/` { - t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) - } - - if len(feed.Entries[0].Enclosures) != 2 { - t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) - } - - expectedResults := []struct { - url string - mimeType string - size int64 - }{ - {"https://example.org/thumbnail.jpg", "image/*", 0}, - {"https://www.youtube.com/v/abcd", "application/x-shockwave-flash", 0}, - } - - for index, enclosure := range feed.Entries[0].Enclosures { - if expectedResults[index].url != enclosure.URL { - t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) - } - - if expectedResults[index].mimeType != enclosure.MimeType { - t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) - } - - if expectedResults[index].size != enclosure.Size { - t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) - } - } -} - -func TestParseMediaElements(t *testing.T) { - data := ` - - http://www.example.org/myfeed - My Video Feed - 2005-07-15T12:00:00Z - - - - http://www.example.org/entries/1 - Some Video - 2005-07-15T12:00:00Z - - Another title - - - Some description -A website: http://example.org/ - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].Content != `Some description
A website: http://example.org/` { - t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) - } - - if len(feed.Entries[0].Enclosures) != 2 { - t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) - } - - expectedResults := []struct { - url string - mimeType string - size int64 - }{ - {"https://example.org/thumbnail.jpg", "image/*", 0}, - {"https://www.youtube.com/v/abcd", "application/x-shockwave-flash", 0}, - } - - for index, enclosure := range feed.Entries[0].Enclosures { - if expectedResults[index].url != enclosure.URL { - t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url) - } - - if expectedResults[index].mimeType != enclosure.MimeType { - t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType) - } - - if expectedResults[index].size != enclosure.Size { - t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size) - } - } -} - -func TestParseRepliesLinkRelation(t *testing.T) { - data := ` - - http://www.example.org/myfeed - My Example Feed - 2005-07-28T12:00:00Z - - James - - tag:entries.com,2005:1 - My original entry - 2006-03-01T12:12:12Z - - - - This is my original entry - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].CommentsURL != "http://www.example.org/comments.html" { - t.Errorf("Incorrect entry comments URL, got: %s", feed.Entries[0].CommentsURL) + version := getAtomFeedVersion(bytes.NewBufferString(data)) + if version != "0.3" { + t.Errorf(`Invalid Atom version detected: %s`, version) } } diff --git a/reader/parser/format_test.go b/reader/parser/format_test.go index 3795541..b2836b6 100644 --- a/reader/parser/format_test.go +++ b/reader/parser/format_test.go @@ -26,7 +26,7 @@ func TestDetectRSS(t *testing.T) { } } -func TestDetectAtom(t *testing.T) { +func TestDetectAtom10(t *testing.T) { data := `` format := DetectFeedFormat(data) @@ -35,6 +35,15 @@ func TestDetectAtom(t *testing.T) { } } +func TestDetectAtom03(t *testing.T) { + data := `` + format := DetectFeedFormat(data) + + if format != FormatAtom { + t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom) + } +} + func TestDetectAtomWithISOCharset(t *testing.T) { data := `` format := DetectFeedFormat(data) -- cgit v1.2.3