From d5838b67340ca83dcc32f6d1775c183188ec5e7a Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Mon, 20 Nov 2017 19:17:04 -0800 Subject: Move feed parsers packages in reader package --- reader/feed/atom/atom.go | 193 -------------- reader/feed/atom/parser.go | 29 --- reader/feed/atom/parser_test.go | 333 ------------------------ reader/feed/date/parser.go | 203 --------------- reader/feed/json/json.go | 171 ------------- reader/feed/json/parser.go | 24 -- reader/feed/json/parser_test.go | 359 -------------------------- reader/feed/parser.go | 8 +- reader/feed/rdf/parser.go | 28 -- reader/feed/rdf/parser_test.go | 307 ---------------------- reader/feed/rdf/rdf.go | 71 ------ reader/feed/rss/parser.go | 29 --- reader/feed/rss/parser_test.go | 550 ---------------------------------------- reader/feed/rss/rss.go | 235 ----------------- 14 files changed, 4 insertions(+), 2536 deletions(-) delete mode 100644 reader/feed/atom/atom.go delete mode 100644 reader/feed/atom/parser.go delete mode 100644 reader/feed/atom/parser_test.go delete mode 100644 reader/feed/date/parser.go delete mode 100644 reader/feed/json/json.go delete mode 100644 reader/feed/json/parser.go delete mode 100644 reader/feed/json/parser_test.go delete mode 100644 reader/feed/rdf/parser.go delete mode 100644 reader/feed/rdf/parser_test.go delete mode 100644 reader/feed/rdf/rdf.go delete mode 100644 reader/feed/rss/parser.go delete mode 100644 reader/feed/rss/parser_test.go delete mode 100644 reader/feed/rss/rss.go (limited to 'reader/feed') diff --git a/reader/feed/atom/atom.go b/reader/feed/atom/atom.go deleted file mode 100644 index 2035a4e..0000000 --- a/reader/feed/atom/atom.go +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package atom - -import ( - "encoding/xml" - "log" - "strconv" - "strings" - "time" - - "github.com/miniflux/miniflux2/helper" - "github.com/miniflux/miniflux2/model" - "github.com/miniflux/miniflux2/reader/feed/date" - "github.com/miniflux/miniflux2/reader/processor" - "github.com/miniflux/miniflux2/reader/sanitizer" -) - -type atomFeed struct { - XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"` - ID string `xml:"id"` - Title string `xml:"title"` - Author atomAuthor `xml:"author"` - Links []atomLink `xml:"link"` - Entries []atomEntry `xml:"entry"` -} - -type atomEntry struct { - ID string `xml:"id"` - Title string `xml:"title"` - Updated string `xml:"updated"` - Links []atomLink `xml:"link"` - Summary string `xml:"summary"` - Content atomContent `xml:"content"` - MediaGroup atomMediaGroup `xml:"http://search.yahoo.com/mrss/ group"` - Author atomAuthor `xml:"author"` -} - -type atomAuthor struct { - Name string `xml:"name"` - Email string `xml:"email"` -} - -type atomLink struct { - URL string `xml:"href,attr"` - Type string `xml:"type,attr"` - Rel string `xml:"rel,attr"` - Length string `xml:"length,attr"` -} - -type atomContent struct { - Type string `xml:"type,attr"` - Data string `xml:",chardata"` - XML string `xml:",innerxml"` -} - -type atomMediaGroup struct { - Description string `xml:"http://search.yahoo.com/mrss/ description"` -} - -func (a *atomFeed) Transform() *model.Feed { - feed := new(model.Feed) - feed.FeedURL = getRelationURL(a.Links, "self") - feed.SiteURL = getURL(a.Links) - feed.Title = sanitizer.StripTags(a.Title) - - if feed.Title == "" { - feed.Title = feed.SiteURL - } - - for _, entry := range a.Entries { - item := entry.Transform() - if item.Author == "" { - item.Author = getAuthor(a.Author) - } - - feed.Entries = append(feed.Entries, item) - } - - return feed -} - -func (a *atomEntry) Transform() *model.Entry { - entry := new(model.Entry) - entry.URL = getURL(a.Links) - entry.Date = getDate(a) - entry.Author = sanitizer.StripTags(getAuthor(a.Author)) - entry.Hash = getHash(a) - entry.Content = processor.ItemContentProcessor(entry.URL, getContent(a)) - entry.Title = sanitizer.StripTags(strings.Trim(a.Title, " \n\t")) - entry.Enclosures = getEnclosures(a) - - if entry.Title == "" { - entry.Title = entry.URL - } - - return entry -} - -func getURL(links []atomLink) string { - for _, link := range links { - if strings.ToLower(link.Rel) == "alternate" { - return link.URL - } - - if link.Rel == "" && link.Type == "" { - return link.URL - } - } - - return "" -} - -func getRelationURL(links []atomLink, relation string) string { - for _, link := range links { - if strings.ToLower(link.Rel) == relation { - return link.URL - } - } - - return "" -} - -func getDate(a *atomEntry) time.Time { - if a.Updated != "" { - result, err := date.Parse(a.Updated) - if err != nil { - log.Println(err) - return time.Now() - } - - return result - } - - return time.Now() -} - -func getContent(a *atomEntry) string { - if a.Content.Type == "html" || a.Content.Type == "text" { - return a.Content.Data - } - - if a.Content.Type == "xhtml" { - return a.Content.XML - } - - if a.Summary != "" { - return a.Summary - } - - if a.MediaGroup.Description != "" { - return a.MediaGroup.Description - } - - return "" -} - -func getHash(a *atomEntry) string { - for _, value := range []string{a.ID, getURL(a.Links)} { - if value != "" { - return helper.Hash(value) - } - } - - return "" -} - -func getEnclosures(a *atomEntry) model.EnclosureList { - enclosures := make(model.EnclosureList, 0) - - for _, link := range a.Links { - if strings.ToLower(link.Rel) == "enclosure" { - length, _ := strconv.Atoi(link.Length) - enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length}) - } - } - - return enclosures -} - -func getAuthor(author atomAuthor) string { - if author.Name != "" { - return author.Name - } - - if author.Email != "" { - return author.Email - } - - return "" -} diff --git a/reader/feed/atom/parser.go b/reader/feed/atom/parser.go deleted file mode 100644 index 04bfd04..0000000 --- a/reader/feed/atom/parser.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package atom - -import ( - "encoding/xml" - "io" - - "github.com/miniflux/miniflux2/errors" - "github.com/miniflux/miniflux2/model" - - "golang.org/x/net/html/charset" -) - -// Parse returns a normalized feed struct from a Atom feed. -func Parse(data io.Reader) (*model.Feed, error) { - atomFeed := new(atomFeed) - decoder := xml.NewDecoder(data) - decoder.CharsetReader = charset.NewReaderLabel - - err := decoder.Decode(atomFeed) - if err != nil { - return nil, errors.NewLocalizedError("Unable to parse Atom feed: %v.", err) - } - - return atomFeed.Transform(), nil -} diff --git a/reader/feed/atom/parser_test.go b/reader/feed/atom/parser_test.go deleted file mode 100644 index d5eea23..0000000 --- a/reader/feed/atom/parser_test.go +++ /dev/null @@ -1,333 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package atom - -import ( - "bytes" - "testing" - "time" - - "github.com/miniflux/miniflux2/errors" -) - -func TestParseAtomSample(t *testing.T) { - data := ` - - - Example Feed - - 2003-12-13T18:30:02Z - - John Doe - - urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6 - - - Atom-Powered Robots Run Amok - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Title != "Example Feed" { - t.Errorf("Incorrect title, got: %s", feed.Title) - } - - if feed.FeedURL != "" { - t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) - } - - if feed.SiteURL != "http://example.org/" { - t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) { - t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) - } - - if feed.Entries[0].Hash != "3841e5cf232f5111fc5841e9eba5f4b26d95e7d7124902e0f7272729d65601a6" { - t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) - } - - if feed.Entries[0].URL != "http://example.org/2003/12/13/atom03" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].Title != "Atom-Powered Robots Run Amok" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } - - if feed.Entries[0].Content != "Some text." { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } - - if feed.Entries[0].Author != "John Doe" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseFeedWithoutTitle(t *testing.T) { - data := ` - - - - 2003-12-13T18:30:02Z - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Title != "https://example.org/" { - t.Errorf("Incorrect feed title, got: %s", feed.Title) - } -} - -func TestParseEntryWithoutTitle(t *testing.T) { - data := ` - - - Example Feed - - 2003-12-13T18:30:02Z - - John Doe - - urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6 - - - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Title != "http://example.org/2003/12/13/atom03" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } -} - -func TestParseFeedURL(t *testing.T) { - data := ` - - Example Feed - - - 2003-12-13T18:30:02Z - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.SiteURL != "https://example.org/" { - t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) - } - - if feed.FeedURL != "https://example.org/feed" { - t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) - } -} - -func TestParseEntryTitleWithWhitespaces(t *testing.T) { - data := ` - - Example Feed - - - - - Some Title - - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Title != "Some Title" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } -} - -func TestParseEntryWithAuthorName(t *testing.T) { - data := ` - - Example Feed - - - - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - Me - me@localhost - - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Author != "Me" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseEntryWithoutAuthorName(t *testing.T) { - data := ` - - Example Feed - - - - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - me@localhost - - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Author != "me@localhost" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseEntryWithEnclosures(t *testing.T) { - data := ` - - http://www.example.org/myfeed - My Podcast Feed - 2005-07-15T12:00:00Z - - John Doe - - - - - http://www.example.org/entries/1 - Atom 1.0 - 2005-07-15T12:00:00Z - - An overview of Atom 1.0 - - - -
-

Show Notes

-
    -
  • 00:01:00 -- Introduction
  • -
  • 00:15:00 -- Talking about Atom 1.0
  • -
  • 00:30:00 -- Wrapping up
  • -
-
-
-
-
` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if len(feed.Entries[0].Enclosures) != 2 { - t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) - } - - if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" { - t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL) - } - - if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" { - t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType) - } - - if feed.Entries[0].Enclosures[0].Size != 1234 { - t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size) - } - - if feed.Entries[0].Enclosures[1].URL != "http://www.example.org/myaudiofile.torrent" { - t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[1].URL) - } - - if feed.Entries[0].Enclosures[1].MimeType != "application/x-bittorrent" { - t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[1].MimeType) - } - - if feed.Entries[0].Enclosures[1].Size != 4567 { - t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[1].Size) - } -} - -func TestParseInvalidXml(t *testing.T) { - data := `garbage` - _, err := Parse(bytes.NewBufferString(data)) - if err == nil { - t.Error("Parse should returns an error") - } - - if _, ok := err.(errors.LocalizedError); !ok { - t.Error("The error returned must be a LocalizedError") - } -} diff --git a/reader/feed/date/parser.go b/reader/feed/date/parser.go deleted file mode 100644 index e573ff8..0000000 --- a/reader/feed/date/parser.go +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package date - -import ( - "fmt" - "strings" - "time" -) - -// DateFormats taken from github.com/mjibson/goread -var dateFormats = []string{ - time.RFC822, // RSS - time.RFC822Z, // RSS - time.RFC3339, // Atom - time.UnixDate, - time.RubyDate, - time.RFC850, - time.RFC1123Z, - time.RFC1123, - time.ANSIC, - "Mon, January 2 2006 15:04:05 -0700", - "Mon, January 02, 2006, 15:04:05 MST", - "Mon, January 02, 2006 15:04:05 MST", - "Mon, Jan 2, 2006 15:04 MST", - "Mon, Jan 2 2006 15:04 MST", - "Mon, Jan 2, 2006 15:04:05 MST", - "Mon, Jan 2 2006 15:04:05 -700", - "Mon, Jan 2 2006 15:04:05 -0700", - "Mon Jan 2 15:04 2006", - "Mon Jan 2 15:04:05 2006 MST", - "Mon Jan 02, 2006 3:04 pm", - "Mon, Jan 02,2006 15:04:05 MST", - "Mon Jan 02 2006 15:04:05 -0700", - "Monday, January 2, 2006 15:04:05 MST", - "Monday, January 2, 2006 03:04 PM", - "Monday, January 2, 2006", - "Monday, January 02, 2006", - "Monday, 2 January 2006 15:04:05 MST", - "Monday, 2 January 2006 15:04:05 -0700", - "Monday, 2 Jan 2006 15:04:05 MST", - "Monday, 2 Jan 2006 15:04:05 -0700", - "Monday, 02 January 2006 15:04:05 MST", - "Monday, 02 January 2006 15:04:05 -0700", - "Monday, 02 January 2006 15:04:05", - "Mon, 2 January 2006 15:04 MST", - "Mon, 2 January 2006, 15:04 -0700", - "Mon, 2 January 2006, 15:04:05 MST", - "Mon, 2 January 2006 15:04:05 MST", - "Mon, 2 January 2006 15:04:05 -0700", - "Mon, 2 January 2006", - "Mon, 2 Jan 2006 3:04:05 PM -0700", - "Mon, 2 Jan 2006 15:4:5 MST", - "Mon, 2 Jan 2006 15:4:5 -0700 GMT", - "Mon, 2, Jan 2006 15:4", - "Mon, 2 Jan 2006 15:04 MST", - "Mon, 2 Jan 2006, 15:04 -0700", - "Mon, 2 Jan 2006 15:04 -0700", - "Mon, 2 Jan 2006 15:04:05 UT", - "Mon, 2 Jan 2006 15:04:05MST", - "Mon, 2 Jan 2006 15:04:05 MST", - "Mon 2 Jan 2006 15:04:05 MST", - "mon,2 Jan 2006 15:04:05 MST", - "Mon, 2 Jan 2006 15:04:05 -0700 MST", - "Mon, 2 Jan 2006 15:04:05-0700", - "Mon, 2 Jan 2006 15:04:05 -0700", - "Mon, 2 Jan 2006 15:04:05", - "Mon, 2 Jan 2006 15:04", - "Mon,2 Jan 2006", - "Mon, 2 Jan 2006", - "Mon, 2 Jan 15:04:05 MST", - "Mon, 2 Jan 06 15:04:05 MST", - "Mon, 2 Jan 06 15:04:05 -0700", - "Mon, 2006-01-02 15:04", - "Mon,02 January 2006 14:04:05 MST", - "Mon, 02 January 2006", - "Mon, 02 Jan 2006 3:04:05 PM MST", - "Mon, 02 Jan 2006 15 -0700", - "Mon,02 Jan 2006 15:04 MST", - "Mon, 02 Jan 2006 15:04 MST", - "Mon, 02 Jan 2006 15:04 -0700", - "Mon, 02 Jan 2006 15:04:05 Z", - "Mon, 02 Jan 2006 15:04:05 UT", - "Mon, 02 Jan 2006 15:04:05 MST-07:00", - "Mon, 02 Jan 2006 15:04:05 MST -0700", - "Mon, 02 Jan 2006, 15:04:05 MST", - "Mon, 02 Jan 2006 15:04:05MST", - "Mon, 02 Jan 2006 15:04:05 MST", - "Mon , 02 Jan 2006 15:04:05 MST", - "Mon, 02 Jan 2006 15:04:05 GMT-0700", - "Mon,02 Jan 2006 15:04:05 -0700", - "Mon, 02 Jan 2006 15:04:05 -0700", - "Mon, 02 Jan 2006 15:04:05 -07:00", - "Mon, 02 Jan 2006 15:04:05 --0700", - "Mon 02 Jan 2006 15:04:05 -0700", - "Mon, 02 Jan 2006 15:04:05 -07", - "Mon, 02 Jan 2006 15:04:05 00", - "Mon, 02 Jan 2006 15:04:05", - "Mon, 02 Jan 2006", - "Mon, 02 Jan 06 15:04:05 MST", - "January 2, 2006 3:04 PM", - "January 2, 2006, 3:04 p.m.", - "January 2, 2006 15:04:05 MST", - "January 2, 2006 15:04:05", - "January 2, 2006 03:04 PM", - "January 2, 2006", - "January 02, 2006 15:04:05 MST", - "January 02, 2006 15:04", - "January 02, 2006 03:04 PM", - "January 02, 2006", - "Jan 2, 2006 3:04:05 PM MST", - "Jan 2, 2006 3:04:05 PM", - "Jan 2, 2006 15:04:05 MST", - "Jan 2, 2006", - "Jan 02 2006 03:04:05PM", - "Jan 02, 2006", - "6/1/2 15:04", - "6-1-2 15:04", - "2 January 2006 15:04:05 MST", - "2 January 2006 15:04:05 -0700", - "2 January 2006", - "2 Jan 2006 15:04:05 Z", - "2 Jan 2006 15:04:05 MST", - "2 Jan 2006 15:04:05 -0700", - "2 Jan 2006", - "2.1.2006 15:04:05", - "2/1/2006", - "2-1-2006", - "2006 January 02", - "2006-1-2T15:04:05Z", - "2006-1-2 15:04:05", - "2006-1-2", - "2006-1-02T15:04:05Z", - "2006-01-02T15:04Z", - "2006-01-02T15:04-07:00", - "2006-01-02T15:04:05Z", - "2006-01-02T15:04:05-07:00:00", - "2006-01-02T15:04:05:-0700", - "2006-01-02T15:04:05-0700", - "2006-01-02T15:04:05-07:00", - "2006-01-02T15:04:05 -0700", - "2006-01-02T15:04:05:00", - "2006-01-02T15:04:05", - "2006-01-02 at 15:04:05", - "2006-01-02 15:04:05Z", - "2006-01-02 15:04:05 MST", - "2006-01-02 15:04:05-0700", - "2006-01-02 15:04:05-07:00", - "2006-01-02 15:04:05 -0700", - "2006-01-02 15:04", - "2006-01-02 00:00:00.0 15:04:05.0 -0700", - "2006/01/02", - "2006-01-02", - "15:04 02.01.2006 -0700", - "1/2/2006 3:04 PM MST", - "1/2/2006 3:04:05 PM MST", - "1/2/2006 3:04:05 PM", - "1/2/2006 15:04:05 MST", - "1/2/2006", - "06/1/2 15:04", - "06-1-2 15:04", - "02 Monday, Jan 2006 15:04", - "02 Jan 2006 15:04 MST", - "02 Jan 2006 15:04:05 UT", - "02 Jan 2006 15:04:05 MST", - "02 Jan 2006 15:04:05 -0700", - "02 Jan 2006 15:04:05", - "02 Jan 2006", - "02/01/2006 15:04 MST", - "02-01-2006 15:04:05 MST", - "02.01.2006 15:04:05", - "02/01/2006 15:04:05", - "02.01.2006 15:04", - "02/01/2006 - 15:04", - "02.01.2006 -0700", - "02/01/2006", - "02-01-2006", - "01/02/2006 3:04 PM", - "01/02/2006 15:04:05 MST", - "01/02/2006 - 15:04", - "01/02/2006", - "01-02-2006", -} - -// Parse parses a given date string using a large -// list of commonly found feed date formats. -func Parse(ds string) (t time.Time, err error) { - d := strings.TrimSpace(ds) - if d == "" { - return t, fmt.Errorf("Date string is empty") - } - - for _, f := range dateFormats { - if t, err = time.Parse(f, d); err == nil { - return - } - } - - err = fmt.Errorf("Failed to parse date: %s", ds) - return -} diff --git a/reader/feed/json/json.go b/reader/feed/json/json.go deleted file mode 100644 index e4473f4..0000000 --- a/reader/feed/json/json.go +++ /dev/null @@ -1,171 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package json - -import ( - "log" - "strings" - "time" - - "github.com/miniflux/miniflux2/helper" - "github.com/miniflux/miniflux2/model" - "github.com/miniflux/miniflux2/reader/feed/date" - "github.com/miniflux/miniflux2/reader/processor" - "github.com/miniflux/miniflux2/reader/sanitizer" -) - -type jsonFeed struct { - Version string `json:"version"` - Title string `json:"title"` - SiteURL string `json:"home_page_url"` - FeedURL string `json:"feed_url"` - Author jsonAuthor `json:"author"` - Items []jsonItem `json:"items"` -} - -type jsonAuthor struct { - Name string `json:"name"` - URL string `json:"url"` -} - -type jsonItem struct { - ID string `json:"id"` - URL string `json:"url"` - Title string `json:"title"` - Summary string `json:"summary"` - Text string `json:"content_text"` - HTML string `json:"content_html"` - DatePublished string `json:"date_published"` - DateModified string `json:"date_modified"` - Author jsonAuthor `json:"author"` - Attachments []jsonAttachment `json:"attachments"` -} - -type jsonAttachment struct { - URL string `json:"url"` - MimeType string `json:"mime_type"` - Title string `json:"title"` - Size int `json:"size_in_bytes"` - Duration int `json:"duration_in_seconds"` -} - -func (j *jsonFeed) GetAuthor() string { - return getAuthor(j.Author) -} - -func (j *jsonFeed) Transform() *model.Feed { - feed := new(model.Feed) - feed.FeedURL = j.FeedURL - feed.SiteURL = j.SiteURL - feed.Title = sanitizer.StripTags(j.Title) - - if feed.Title == "" { - feed.Title = feed.SiteURL - } - - for _, item := range j.Items { - entry := item.Transform() - if entry.Author == "" { - entry.Author = j.GetAuthor() - } - - feed.Entries = append(feed.Entries, entry) - } - - return feed -} - -func (j *jsonItem) GetDate() time.Time { - for _, value := range []string{j.DatePublished, j.DateModified} { - if value != "" { - d, err := date.Parse(value) - if err != nil { - log.Println(err) - return time.Now() - } - - return d - } - } - - return time.Now() -} - -func (j *jsonItem) GetAuthor() string { - return getAuthor(j.Author) -} - -func (j *jsonItem) GetHash() string { - for _, value := range []string{j.ID, j.URL, j.Text + j.HTML + j.Summary} { - if value != "" { - return helper.Hash(value) - } - } - - return "" -} - -func (j *jsonItem) GetTitle() string { - for _, value := range []string{j.Title, j.Summary, j.Text, j.HTML} { - if value != "" { - return truncate(value) - } - } - - return j.URL -} - -func (j *jsonItem) GetContent() string { - for _, value := range []string{j.HTML, j.Text, j.Summary} { - if value != "" { - return value - } - } - - return "" -} - -func (j *jsonItem) GetEnclosures() model.EnclosureList { - enclosures := make(model.EnclosureList, 0) - - for _, attachment := range j.Attachments { - enclosures = append(enclosures, &model.Enclosure{ - URL: attachment.URL, - MimeType: attachment.MimeType, - Size: attachment.Size, - }) - } - - return enclosures -} - -func (j *jsonItem) Transform() *model.Entry { - entry := new(model.Entry) - entry.URL = j.URL - entry.Date = j.GetDate() - entry.Author = sanitizer.StripTags(j.GetAuthor()) - entry.Hash = j.GetHash() - entry.Content = processor.ItemContentProcessor(entry.URL, j.GetContent()) - entry.Title = sanitizer.StripTags(strings.Trim(j.GetTitle(), " \n\t")) - entry.Enclosures = j.GetEnclosures() - return entry -} - -func getAuthor(author jsonAuthor) string { - if author.Name != "" { - return author.Name - } - - return "" -} - -func truncate(str string) string { - max := 100 - if len(str) > max { - return str[:max] + "..." - } - - return str -} diff --git a/reader/feed/json/parser.go b/reader/feed/json/parser.go deleted file mode 100644 index 18329e7..0000000 --- a/reader/feed/json/parser.go +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package json - -import ( - "encoding/json" - "io" - - "github.com/miniflux/miniflux2/errors" - "github.com/miniflux/miniflux2/model" -) - -// Parse returns a normalized feed struct from a JON feed. -func Parse(data io.Reader) (*model.Feed, error) { - feed := new(jsonFeed) - decoder := json.NewDecoder(data) - if err := decoder.Decode(&feed); err != nil { - return nil, errors.NewLocalizedError("Unable to parse JSON Feed: %v", err) - } - - return feed.Transform(), nil -} diff --git a/reader/feed/json/parser_test.go b/reader/feed/json/parser_test.go deleted file mode 100644 index ecb11a1..0000000 --- a/reader/feed/json/parser_test.go +++ /dev/null @@ -1,359 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package json - -import ( - "bytes" - "strings" - "testing" - "time" - - "github.com/miniflux/miniflux2/errors" -) - -func TestParseJsonFeed(t *testing.T) { - data := `{ - "version": "https://jsonfeed.org/version/1", - "title": "My Example Feed", - "home_page_url": "https://example.org/", - "feed_url": "https://example.org/feed.json", - "items": [ - { - "id": "2", - "content_text": "This is a second item.", - "url": "https://example.org/second-item" - }, - { - "id": "1", - "content_html": "

Hello, world!

", - "url": "https://example.org/initial-post" - } - ] - }` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Title != "My Example Feed" { - t.Errorf("Incorrect title, got: %s", feed.Title) - } - - if feed.FeedURL != "https://example.org/feed.json" { - t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) - } - - if feed.SiteURL != "https://example.org/" { - t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) - } - - if len(feed.Entries) != 2 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].Hash != "d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35" { - t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) - } - - if feed.Entries[0].URL != "https://example.org/second-item" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].Title != "This is a second item." { - t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title) - } - - if feed.Entries[0].Content != "This is a second item." { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } - - if feed.Entries[1].Hash != "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b" { - t.Errorf("Incorrect entry hash, got: %s", feed.Entries[1].Hash) - } - - if feed.Entries[1].URL != "https://example.org/initial-post" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL) - } - - if feed.Entries[1].Title != "Hello, world!" { - t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[1].Title) - } - - if feed.Entries[1].Content != "

Hello, world!

" { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[1].Content) - } -} - -func TestParsePodcast(t *testing.T) { - data := `{ - "version": "https://jsonfeed.org/version/1", - "user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json", - "title": "The Record", - "home_page_url": "http://therecord.co/", - "feed_url": "http://therecord.co/feed.json", - "items": [ - { - "id": "http://therecord.co/chris-parrish", - "title": "Special #1 - Chris Parrish", - "url": "http://therecord.co/chris-parrish", - "content_text": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.", - "content_html": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.", - "summary": "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.", - "date_published": "2014-05-09T14:04:00-07:00", - "attachments": [ - { - "url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a", - "mime_type": "audio/x-m4a", - "size_in_bytes": 89970236, - "duration_in_seconds": 6629 - } - ] - } - ] - }` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Title != "The Record" { - t.Errorf("Incorrect title, got: %s", feed.Title) - } - - if feed.FeedURL != "http://therecord.co/feed.json" { - t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) - } - - if feed.SiteURL != "http://therecord.co/" { - t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].Hash != "6b678e57962a1b001e4e873756563cdc08bbd06ca561e764e0baa9a382485797" { - t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) - } - - if feed.Entries[0].URL != "http://therecord.co/chris-parrish" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].Title != "Special #1 - Chris Parrish" { - t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title) - } - - if feed.Entries[0].Content != `Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.` { - t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content) - } - - location, _ := time.LoadLocation("America/Vancouver") - if !feed.Entries[0].Date.Equal(time.Date(2014, time.May, 9, 14, 4, 0, 0, location)) { - t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) - } - - if len(feed.Entries[0].Enclosures) != 1 { - t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) - } - - if feed.Entries[0].Enclosures[0].URL != "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a" { - t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL) - } - - if feed.Entries[0].Enclosures[0].MimeType != "audio/x-m4a" { - t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType) - } - - if feed.Entries[0].Enclosures[0].Size != 89970236 { - t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size) - } -} - -func TestParseAuthor(t *testing.T) { - data := `{ - "version": "https://jsonfeed.org/version/1", - "user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json", - "title": "Brent Simmons’s Microblog", - "home_page_url": "https://example.org/", - "feed_url": "https://example.org/feed.json", - "author": { - "name": "Brent Simmons", - "url": "http://example.org/", - "avatar": "https://example.org/avatar.png" - }, - "items": [ - { - "id": "2347259", - "url": "https://example.org/2347259", - "content_text": "Cats are neat. \n\nhttps://example.org/cats", - "date_published": "2016-02-09T14:22:00-07:00" - } - ] - }` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].Author != "Brent Simmons" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseFeedWithoutTitle(t *testing.T) { - data := `{ - "version": "https://jsonfeed.org/version/1", - "home_page_url": "https://example.org/", - "feed_url": "https://example.org/feed.json", - "items": [ - { - "id": "2347259", - "url": "https://example.org/2347259", - "content_text": "Cats are neat. \n\nhttps://example.org/cats", - "date_published": "2016-02-09T14:22:00-07:00" - } - ] - }` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Title != "https://example.org/" { - t.Errorf("Incorrect title, got: %s", feed.Title) - } -} - -func TestParseFeedItemWithInvalidDate(t *testing.T) { - data := `{ - "version": "https://jsonfeed.org/version/1", - "title": "My Example Feed", - "home_page_url": "https://example.org/", - "feed_url": "https://example.org/feed.json", - "items": [ - { - "id": "2347259", - "url": "https://example.org/2347259", - "content_text": "Cats are neat. \n\nhttps://example.org/cats", - "date_published": "Tomorrow" - } - ] - }` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if !feed.Entries[0].Date.Before(time.Now()) { - t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date) - } -} - -func TestParseFeedItemWithoutID(t *testing.T) { - data := `{ - "version": "https://jsonfeed.org/version/1", - "title": "My Example Feed", - "home_page_url": "https://example.org/", - "feed_url": "https://example.org/feed.json", - "items": [ - { - "content_text": "Some text." - } - ] - }` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].Hash != "13b4c5aecd1b6d749afcee968fbf9c80f1ed1bbdbe1aaf25cb34ebd01144bbe9" { - t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) - } -} - -func TestParseFeedItemWithoutTitle(t *testing.T) { - data := `{ - "version": "https://jsonfeed.org/version/1", - "title": "My Example Feed", - "home_page_url": "https://example.org/", - "feed_url": "https://example.org/feed.json", - "items": [ - { - "url": "https://example.org/item" - } - ] - }` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].Title != "https://example.org/item" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } -} - -func TestParseTruncateItemTitle(t *testing.T) { - data := `{ - "version": "https://jsonfeed.org/version/1", - "title": "My Example Feed", - "home_page_url": "https://example.org/", - "feed_url": "https://example.org/feed.json", - "items": [ - { - "title": "` + strings.Repeat("a", 200) + `" - } - ] - }` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if len(feed.Entries[0].Title) != 103 { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } -} - -func TestParseInvalidJSON(t *testing.T) { - data := `garbage` - _, err := Parse(bytes.NewBufferString(data)) - if err == nil { - t.Error("Parse should returns an error") - } - - if _, ok := err.(errors.LocalizedError); !ok { - t.Error("The error returned must be a LocalizedError") - } -} diff --git a/reader/feed/parser.go b/reader/feed/parser.go index d94d72b..a28cf2d 100644 --- a/reader/feed/parser.go +++ b/reader/feed/parser.go @@ -14,10 +14,10 @@ import ( "github.com/miniflux/miniflux2/helper" "github.com/miniflux/miniflux2/model" - "github.com/miniflux/miniflux2/reader/feed/atom" - "github.com/miniflux/miniflux2/reader/feed/json" - "github.com/miniflux/miniflux2/reader/feed/rdf" - "github.com/miniflux/miniflux2/reader/feed/rss" + "github.com/miniflux/miniflux2/reader/atom" + "github.com/miniflux/miniflux2/reader/json" + "github.com/miniflux/miniflux2/reader/rdf" + "github.com/miniflux/miniflux2/reader/rss" "golang.org/x/net/html/charset" ) diff --git a/reader/feed/rdf/parser.go b/reader/feed/rdf/parser.go deleted file mode 100644 index f854a97..0000000 --- a/reader/feed/rdf/parser.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package rdf - -import ( - "encoding/xml" - "io" - - "github.com/miniflux/miniflux2/errors" - "github.com/miniflux/miniflux2/model" - "golang.org/x/net/html/charset" -) - -// Parse returns a normalized feed struct from a RDF feed. -func Parse(data io.Reader) (*model.Feed, error) { - feed := new(rdfFeed) - decoder := xml.NewDecoder(data) - decoder.CharsetReader = charset.NewReaderLabel - - err := decoder.Decode(feed) - if err != nil { - return nil, errors.NewLocalizedError("Unable to parse RDF feed: %v.", err) - } - - return feed.Transform(), nil -} diff --git a/reader/feed/rdf/parser_test.go b/reader/feed/rdf/parser_test.go deleted file mode 100644 index dadca6f..0000000 --- a/reader/feed/rdf/parser_test.go +++ /dev/null @@ -1,307 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package rdf - -import ( - "bytes" - "strings" - "testing" - - "github.com/miniflux/miniflux2/errors" -) - -func TestParseRDFSample(t *testing.T) { - data := ` - - - - - - XML.com - http://xml.com/pub - - XML.com features a rich mix of information and services - for the XML community. - - - - - - - - - - - - - - - - - XML.com - http://www.xml.com - http://xml.com/universal/images/xml_tiny.gif - - - - Processing Inclusions with XSLT - http://xml.com/pub/2000/08/09/xslt/xslt.html - - Processing document inclusions with general XML tools can be - problematic. This article proposes a way of preserving inclusion - information through SAX-based processing. - - - - - Putting RDF to Work - http://xml.com/pub/2000/08/09/rdfdb/index.html - - Tool and API support for the Resource Description Framework - is slowly coming of age. Edd Dumbill takes a look at RDFDB, - one of the most exciting new RDF toolkits. - - - - - Search XML.com - Search XML.com's XML collection - s - http://search.xml.com - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Title != "XML.com" { - t.Errorf("Incorrect title, got: %s", feed.Title) - } - - if feed.FeedURL != "" { - t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) - } - - if feed.SiteURL != "http://xml.com/pub" { - t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) - } - - if len(feed.Entries) != 2 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[1].Hash != "8aaeee5d3ab50351422fbded41078ee88c73bf1441085b16a8c09fd90a7db321" { - t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) - } - - if feed.Entries[1].URL != "http://xml.com/pub/2000/08/09/rdfdb/index.html" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[1].Title != "Putting RDF to Work" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } - - if strings.HasSuffix(feed.Entries[1].Content, "Tool and API support") { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } -} - -func TestParseRDFSampleWithDublinCore(t *testing.T) { - data := ` - - - - - Meerkat - http://meerkat.oreillynet.com - Meerkat: An Open Wire Service - The O'Reilly Network - Rael Dornfest (mailto:rael@oreilly.com) - Copyright © 2000 O'Reilly & Associates, Inc. - 2000-01-01T12:00+00:00 - hourly - 2 - 2000-01-01T12:00+00:00 - - - - - - - - - - - - - - - Meerkat Powered! - http://meerkat.oreillynet.com/icons/meerkat-powered.jpg - http://meerkat.oreillynet.com - - - - XML: A Disruptive Technology - http://c.moreover.com/click/here.pl?r123 - - XML is placing increasingly heavy loads on the existing technical - infrastructure of the Internet. - - The O'Reilly Network - Simon St.Laurent (mailto:simonstl@simonstl.com) - Copyright © 2000 O'Reilly & Associates, Inc. - XML - XML.com - NASDAQ - XML - - - - Search Meerkat - Search Meerkat's RSS Database... - s - http://meerkat.oreillynet.com/ - search - regex - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Title != "Meerkat" { - t.Errorf("Incorrect title, got: %s", feed.Title) - } - - if feed.FeedURL != "" { - t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) - } - - if feed.SiteURL != "http://meerkat.oreillynet.com" { - t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].Hash != "fa4ef7c300b175ca66f92f226b5dba5caa2a9619f031101bf56e5b884b02cd97" { - t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) - } - - if feed.Entries[0].URL != "http://c.moreover.com/click/here.pl?r123" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].Title != "XML: A Disruptive Technology" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } - - if strings.HasSuffix(feed.Entries[0].Content, "XML is placing increasingly") { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } - - if feed.Entries[0].Author != "Simon St.Laurent (mailto:simonstl@simonstl.com)" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseItemWithOnlyFeedAuthor(t *testing.T) { - data := ` - - - - - Meerkat - http://meerkat.oreillynet.com - Rael Dornfest (mailto:rael@oreilly.com) - - - - XML: A Disruptive Technology - http://c.moreover.com/click/here.pl?r123 - - XML is placing increasingly heavy loads on the existing technical - infrastructure of the Internet. - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Author != "Rael Dornfest (mailto:rael@oreilly.com)" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseItemWithoutLink(t *testing.T) { - data := ` - - - - - Meerkat - http://meerkat.oreillynet.com - - - - Title - Test - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Hash != "37f5223ebd58639aa62a49afbb61df960efb7dc5db5181dfb3cedd9a49ad34c6" { - t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) - } - - if feed.Entries[0].URL != "http://meerkat.oreillynet.com" { - t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL) - } -} - -func TestParseInvalidXml(t *testing.T) { - data := `garbage` - _, err := Parse(bytes.NewBufferString(data)) - if err == nil { - t.Error("Parse should returns an error") - } - - if _, ok := err.(errors.LocalizedError); !ok { - t.Error("The error returned must be a LocalizedError") - } -} diff --git a/reader/feed/rdf/rdf.go b/reader/feed/rdf/rdf.go deleted file mode 100644 index 05281ca..0000000 --- a/reader/feed/rdf/rdf.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package rdf - -import ( - "encoding/xml" - - "github.com/miniflux/miniflux2/helper" - "github.com/miniflux/miniflux2/reader/processor" - "github.com/miniflux/miniflux2/reader/sanitizer" - - "github.com/miniflux/miniflux2/model" -) - -type rdfFeed struct { - XMLName xml.Name `xml:"RDF"` - Title string `xml:"channel>title"` - Link string `xml:"channel>link"` - Creator string `xml:"channel>creator"` - Items []rdfItem `xml:"item"` -} - -func (r *rdfFeed) Transform() *model.Feed { - feed := new(model.Feed) - feed.Title = sanitizer.StripTags(r.Title) - feed.SiteURL = r.Link - - for _, item := range r.Items { - entry := item.Transform() - - if entry.Author == "" && r.Creator != "" { - entry.Author = sanitizer.StripTags(r.Creator) - } - - if entry.URL == "" { - entry.URL = feed.SiteURL - } - - feed.Entries = append(feed.Entries, entry) - } - - return feed -} - -type rdfItem struct { - Title string `xml:"title"` - Link string `xml:"link"` - Description string `xml:"description"` - Creator string `xml:"creator"` -} - -func (r *rdfItem) Transform() *model.Entry { - entry := new(model.Entry) - entry.Title = sanitizer.StripTags(r.Title) - entry.Author = sanitizer.StripTags(r.Creator) - entry.URL = r.Link - entry.Content = processor.ItemContentProcessor(entry.URL, r.Description) - entry.Hash = getHash(r) - return entry -} - -func getHash(r *rdfItem) string { - value := r.Link - if value == "" { - value = r.Title + r.Description - } - - return helper.Hash(value) -} diff --git a/reader/feed/rss/parser.go b/reader/feed/rss/parser.go deleted file mode 100644 index a5b4434..0000000 --- a/reader/feed/rss/parser.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package rss - -import ( - "encoding/xml" - "io" - - "github.com/miniflux/miniflux2/errors" - "github.com/miniflux/miniflux2/model" - - "golang.org/x/net/html/charset" -) - -// Parse returns a normalized feed struct from a RSS feed. -func Parse(data io.Reader) (*model.Feed, error) { - feed := new(rssFeed) - decoder := xml.NewDecoder(data) - decoder.CharsetReader = charset.NewReaderLabel - - err := decoder.Decode(feed) - if err != nil { - return nil, errors.NewLocalizedError("Unable to parse RSS feed: %v.", err) - } - - return feed.Transform(), nil -} diff --git a/reader/feed/rss/parser_test.go b/reader/feed/rss/parser_test.go deleted file mode 100644 index 9f1a557..0000000 --- a/reader/feed/rss/parser_test.go +++ /dev/null @@ -1,550 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package rss - -import ( - "bytes" - "testing" - "time" - - "github.com/miniflux/miniflux2/errors" -) - -func TestParseRss2Sample(t *testing.T) { - data := ` - - - - Liftoff News - http://liftoff.msfc.nasa.gov/ - Liftoff to Space Exploration. - en-us - Tue, 10 Jun 2003 04:00:00 GMT - Tue, 10 Jun 2003 09:41:01 GMT - http://blogs.law.harvard.edu/tech/rss - Weblog Editor 2.0 - editor@example.com - webmaster@example.com - - Star City - http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp - How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>. - Tue, 03 Jun 2003 09:39:21 GMT - http://liftoff.msfc.nasa.gov/2003/06/03.html#item573 - - - Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st. - Fri, 30 May 2003 11:06:42 GMT - http://liftoff.msfc.nasa.gov/2003/05/30.html#item572 - - - The Engine That Does More - http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp - Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that. - Tue, 27 May 2003 08:37:32 GMT - http://liftoff.msfc.nasa.gov/2003/05/27.html#item571 - - - Astronauts' Dirty Laundry - http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp - Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options. - Tue, 20 May 2003 08:56:02 GMT - http://liftoff.msfc.nasa.gov/2003/05/20.html#item570 - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Title != "Liftoff News" { - t.Errorf("Incorrect title, got: %s", feed.Title) - } - - if feed.FeedURL != "" { - t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) - } - - if feed.SiteURL != "http://liftoff.msfc.nasa.gov/" { - t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) - } - - if len(feed.Entries) != 4 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - expectedDate := time.Date(2003, time.June, 3, 9, 39, 21, 0, time.UTC) - if !feed.Entries[0].Date.Equal(expectedDate) { - t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate) - } - - if feed.Entries[0].Hash != "5b2b4ac2fe1786ddf0fd2da2f1b07f64e691264f41f2db3ea360f31bb6d9152b" { - t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) - } - - if feed.Entries[0].URL != "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].Title != "Star City" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } - - if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's Star City.` { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } -} - -func TestParseFeedWithoutTitle(t *testing.T) { - data := ` - - - https://example.org/ - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Title != "https://example.org/" { - t.Errorf("Incorrect feed title, got: %s", feed.Title) - } -} - -func TestParseEntryWithoutTitle(t *testing.T) { - data := ` - - - https://example.org/ - - https://example.org/item - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Title != "https://example.org/item" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } -} - -func TestParseEntryWithoutLink(t *testing.T) { - data := ` - - - https://example.org/ - - 1234 - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].URL != "https://example.org/" { - t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].Hash != "03ac674216f3e15c761ee1a5e255f067953623c8b388b4459e13f978d7c846f4" { - t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) - } -} - -func TestParseEntryWithAtomLink(t *testing.T) { - data := ` - - - https://example.org/ - - Test - - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].URL != "https://example.org/item" { - t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL) - } -} - -func TestParseEntryWithMultipleAtomLinks(t *testing.T) { - data := ` - - - https://example.org/ - - Test - - - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].URL != "https://example.org/b" { - t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL) - } -} - -func TestParseFeedURLWithAtomLink(t *testing.T) { - data := ` - - - Example - https://example.org/ - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.FeedURL != "https://example.org/rss" { - t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) - } - - if feed.SiteURL != "https://example.org/" { - t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) - } -} - -func TestParseEntryWithAtomAuthor(t *testing.T) { - data := ` - - - Example - https://example.org/ - - - Test - https://example.org/item - - Foo Bar - Vice President - - FooBar Inc. - - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Author != "Foo Bar" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseEntryWithDublinCoreAuthor(t *testing.T) { - data := ` - - - Example - https://example.org/ - - Test - https://example.org/item - Me (me@example.com) - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Author != "Me (me@example.com)" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseEntryWithItunesAuthor(t *testing.T) { - data := ` - - - Example - https://example.org/ - - Test - https://example.org/item - Someone - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Author != "Someone" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseFeedWithItunesAuthor(t *testing.T) { - data := ` - - - Example - https://example.org/ - Someone - - Test - https://example.org/item - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Author != "Someone" { - t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) - } -} - -func TestParseEntryWithDublinCoreDate(t *testing.T) { - data := ` - - - Example - http://example.org/ - - Item 1 - http://example.org/item1 - Description. - UUID - 2002-09-29T23:40:06-05:00 - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - location, _ := time.LoadLocation("EST") - expectedDate := time.Date(2002, time.September, 29, 23, 40, 06, 0, location) - if !feed.Entries[0].Date.Equal(expectedDate) { - t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate) - } -} - -func TestParseEntryWithContentEncoded(t *testing.T) { - data := ` - - - Example - http://example.org/ - - Item 1 - http://example.org/item1 - Description. - UUID - Example.

]]>
-
-
-
` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Content != `

Example.

` { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } -} - -func TestParseEntryWithFeedBurnerLink(t *testing.T) { - data := ` - - - Example - http://example.org/ - - Item 1 - http://example.org/item1 - http://example.org/original - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].URL != "http://example.org/original" { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].URL) - } -} - -func TestParseEntryTitleWithWhitespaces(t *testing.T) { - data := ` - - - Example - http://example.org - - - Some Title - - http://www.example.org/entries/1 - Fri, 15 Jul 2005 00:00:00 -0500 - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if feed.Entries[0].Title != "Some Title" { - t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) - } -} - -func TestParseEntryWithEnclosures(t *testing.T) { - data := ` - - - My Podcast Feed - http://example.org - some.email@example.org - - Podcasting with RSS - http://www.example.org/entries/1 - An overview of RSS podcasting - Fri, 15 Jul 2005 00:00:00 -0500 - http://www.example.org/entries/1 - - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if len(feed.Entries[0].Enclosures) != 1 { - t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) - } - - if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" { - t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL) - } - - if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" { - t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType) - } - - if feed.Entries[0].Enclosures[0].Size != 12345 { - t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size) - } -} - -func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) { - data := ` - - - My Example Feed - http://example.org - some.email@example.org - - Example Item - http://www.example.org/entries/1 - - http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3 - - - ` - - feed, err := Parse(bytes.NewBufferString(data)) - if err != nil { - t.Error(err) - } - - if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if len(feed.Entries[0].Enclosures) != 1 { - t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) - } - - if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" { - t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL) - } - - if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" { - t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType) - } - - if feed.Entries[0].Enclosures[0].Size != 76192460 { - t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size) - } -} - -func TestParseInvalidXml(t *testing.T) { - data := `garbage` - _, err := Parse(bytes.NewBufferString(data)) - if err == nil { - t.Error("Parse should returns an error") - } - - if _, ok := err.(errors.LocalizedError); !ok { - t.Error("The error returned must be a LocalizedError") - } -} diff --git a/reader/feed/rss/rss.go b/reader/feed/rss/rss.go deleted file mode 100644 index d685a3e..0000000 --- a/reader/feed/rss/rss.go +++ /dev/null @@ -1,235 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package rss - -import ( - "encoding/xml" - "log" - "path" - "strconv" - "strings" - "time" - - "github.com/miniflux/miniflux2/helper" - "github.com/miniflux/miniflux2/model" - "github.com/miniflux/miniflux2/reader/feed/date" - "github.com/miniflux/miniflux2/reader/processor" - "github.com/miniflux/miniflux2/reader/sanitizer" -) - -type rssFeed struct { - XMLName xml.Name `xml:"rss"` - Version string `xml:"version,attr"` - Title string `xml:"channel>title"` - Links []rssLink `xml:"channel>link"` - Language string `xml:"channel>language"` - Description string `xml:"channel>description"` - PubDate string `xml:"channel>pubDate"` - ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"` - Items []rssItem `xml:"channel>item"` -} - -type rssLink struct { - XMLName xml.Name - Data string `xml:",chardata"` - Href string `xml:"href,attr"` - Rel string `xml:"rel,attr"` -} - -type rssItem struct { - GUID string `xml:"guid"` - Title string `xml:"title"` - Links []rssLink `xml:"link"` - OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"` - Description string `xml:"description"` - Content string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"` - PubDate string `xml:"pubDate"` - Date string `xml:"http://purl.org/dc/elements/1.1/ date"` - Authors []rssAuthor `xml:"author"` - Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"` - Enclosures []rssEnclosure `xml:"enclosure"` - OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"` -} - -type rssAuthor struct { - XMLName xml.Name - Data string `xml:",chardata"` - Name string `xml:"name"` -} - -type rssEnclosure struct { - URL string `xml:"url,attr"` - Type string `xml:"type,attr"` - Length string `xml:"length,attr"` -} - -func (r *rssFeed) GetSiteURL() string { - for _, element := range r.Links { - if element.XMLName.Space == "" { - return element.Data - } - } - - return "" -} - -func (r *rssFeed) GetFeedURL() string { - for _, element := range r.Links { - if element.XMLName.Space == "http://www.w3.org/2005/Atom" { - return element.Href - } - } - - return "" -} - -func (r *rssFeed) Transform() *model.Feed { - feed := new(model.Feed) - feed.SiteURL = r.GetSiteURL() - feed.FeedURL = r.GetFeedURL() - feed.Title = sanitizer.StripTags(r.Title) - - if feed.Title == "" { - feed.Title = feed.SiteURL - } - - for _, item := range r.Items { - entry := item.Transform() - - if entry.Author == "" && r.ItunesAuthor != "" { - entry.Author = r.ItunesAuthor - } - entry.Author = sanitizer.StripTags(entry.Author) - - if entry.URL == "" { - entry.URL = feed.SiteURL - } - - feed.Entries = append(feed.Entries, entry) - } - - return feed -} -func (r *rssItem) GetDate() time.Time { - value := r.PubDate - if r.Date != "" { - value = r.Date - } - - if value != "" { - result, err := date.Parse(value) - if err != nil { - log.Println(err) - return time.Now() - } - - return result - } - - return time.Now() -} - -func (r *rssItem) GetAuthor() string { - for _, element := range r.Authors { - if element.Name != "" { - return element.Name - } - - if element.Data != "" { - return element.Data - } - } - - return r.Creator -} - -func (r *rssItem) GetHash() string { - for _, value := range []string{r.GUID, r.GetURL()} { - if value != "" { - return helper.Hash(value) - } - } - - return "" -} - -func (r *rssItem) GetContent() string { - if r.Content != "" { - return r.Content - } - - return r.Description -} - -func (r *rssItem) GetURL() string { - if r.OriginalLink != "" { - return r.OriginalLink - } - - for _, link := range r.Links { - if link.XMLName.Space == "http://www.w3.org/2005/Atom" && link.Href != "" && isValidLinkRelation(link.Rel) { - return link.Href - } - - if link.Data != "" { - return link.Data - } - } - - return "" -} - -func (r *rssItem) GetEnclosures() model.EnclosureList { - enclosures := make(model.EnclosureList, 0) - - for _, enclosure := range r.Enclosures { - length, _ := strconv.Atoi(enclosure.Length) - enclosureURL := enclosure.URL - - if r.OrigEnclosureLink != "" { - filename := path.Base(r.OrigEnclosureLink) - if strings.Contains(enclosureURL, filename) { - enclosureURL = r.OrigEnclosureLink - } - } - - enclosures = append(enclosures, &model.Enclosure{ - URL: enclosureURL, - MimeType: enclosure.Type, - Size: length, - }) - } - - return enclosures -} - -func (r *rssItem) Transform() *model.Entry { - entry := new(model.Entry) - entry.URL = r.GetURL() - entry.Date = r.GetDate() - entry.Author = r.GetAuthor() - entry.Hash = r.GetHash() - entry.Content = processor.ItemContentProcessor(entry.URL, r.GetContent()) - entry.Title = sanitizer.StripTags(strings.Trim(r.Title, " \n\t")) - entry.Enclosures = r.GetEnclosures() - - if entry.Title == "" { - entry.Title = entry.URL - } - - return entry -} - -func isValidLinkRelation(rel string) bool { - switch rel { - case "", "alternate", "enclosure", "related", "self", "via": - return true - default: - if strings.HasPrefix(rel, "http") { - return true - } - return false - } -} -- cgit v1.2.3