aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/feed
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2017-11-19 21:10:04 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2017-11-19 22:01:46 -0800
commit8ffb773f43c8dc54801ca1d111854e7e881c93c9 (patch)
tree38133a2fc612597a75fed1d13e5b4042f58a2b7e /reader/feed
First commit
Diffstat (limited to 'reader/feed')
-rw-r--r--reader/feed/atom/atom.go214
-rw-r--r--reader/feed/atom/parser.go28
-rw-r--r--reader/feed/atom/parser_test.go319
-rw-r--r--reader/feed/date/parser.go203
-rw-r--r--reader/feed/handler.go152
-rw-r--r--reader/feed/json/json.go170
-rw-r--r--reader/feed/json/parser.go23
-rw-r--r--reader/feed/json/parser_test.go345
-rw-r--r--reader/feed/parser.go82
-rw-r--r--reader/feed/parser_test.go169
-rw-r--r--reader/feed/rss/parser.go28
-rw-r--r--reader/feed/rss/parser_test.go466
-rw-r--r--reader/feed/rss/rss.go207
13 files changed, 2406 insertions, 0 deletions
diff --git a/reader/feed/atom/atom.go b/reader/feed/atom/atom.go
new file mode 100644
index 0000000..db6172f
--- /dev/null
+++ b/reader/feed/atom/atom.go
@@ -0,0 +1,214 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package atom
+
+import (
+ "encoding/xml"
+ "github.com/miniflux/miniflux2/helper"
+ "github.com/miniflux/miniflux2/model"
+ "github.com/miniflux/miniflux2/reader/feed/date"
+ "github.com/miniflux/miniflux2/reader/processor"
+ "github.com/miniflux/miniflux2/reader/sanitizer"
+ "log"
+ "strconv"
+ "strings"
+ "time"
+)
+
+type AtomFeed struct {
+ XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
+ ID string `xml:"id"`
+ Title string `xml:"title"`
+ Author Author `xml:"author"`
+ Links []Link `xml:"link"`
+ Entries []AtomEntry `xml:"entry"`
+}
+
+type AtomEntry struct {
+ ID string `xml:"id"`
+ Title string `xml:"title"`
+ Updated string `xml:"updated"`
+ Links []Link `xml:"link"`
+ Summary string `xml:"summary"`
+ Content Content `xml:"content"`
+ MediaGroup MediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
+ Author Author `xml:"author"`
+}
+
+type Author struct {
+ Name string `xml:"name"`
+ Email string `xml:"email"`
+}
+
+type Link struct {
+ Url string `xml:"href,attr"`
+ Type string `xml:"type,attr"`
+ Rel string `xml:"rel,attr"`
+ Length string `xml:"length,attr"`
+}
+
+type Content struct {
+ Type string `xml:"type,attr"`
+ Data string `xml:",chardata"`
+ Xml string `xml:",innerxml"`
+}
+
+type MediaGroup struct {
+ Description string `xml:"http://search.yahoo.com/mrss/ description"`
+}
+
+func (a *AtomFeed) getSiteURL() string {
+ for _, link := range a.Links {
+ if strings.ToLower(link.Rel) == "alternate" {
+ return link.Url
+ }
+
+ if link.Rel == "" && link.Type == "" {
+ return link.Url
+ }
+ }
+
+ return ""
+}
+
+func (a *AtomFeed) getFeedURL() string {
+ for _, link := range a.Links {
+ if strings.ToLower(link.Rel) == "self" {
+ return link.Url
+ }
+ }
+
+ return ""
+}
+
+func (a *AtomFeed) Transform() *model.Feed {
+ feed := new(model.Feed)
+ feed.FeedURL = a.getFeedURL()
+ feed.SiteURL = a.getSiteURL()
+ feed.Title = sanitizer.StripTags(a.Title)
+
+ if feed.Title == "" {
+ feed.Title = feed.SiteURL
+ }
+
+ for _, entry := range a.Entries {
+ item := entry.Transform()
+ if item.Author == "" {
+ item.Author = a.GetAuthor()
+ }
+
+ feed.Entries = append(feed.Entries, item)
+ }
+
+ return feed
+}
+
+func (a *AtomFeed) GetAuthor() string {
+ return getAuthor(a.Author)
+}
+
+func (e *AtomEntry) GetDate() time.Time {
+ if e.Updated != "" {
+ result, err := date.Parse(e.Updated)
+ if err != nil {
+ log.Println(err)
+ return time.Now()
+ }
+
+ return result
+ }
+
+ return time.Now()
+}
+
+func (e *AtomEntry) GetURL() string {
+ for _, link := range e.Links {
+ if strings.ToLower(link.Rel) == "alternate" {
+ return link.Url
+ }
+
+ if link.Rel == "" && link.Type == "" {
+ return link.Url
+ }
+ }
+
+ return ""
+}
+
+func (e *AtomEntry) GetAuthor() string {
+ return getAuthor(e.Author)
+}
+
+func (e *AtomEntry) GetHash() string {
+ for _, value := range []string{e.ID, e.GetURL()} {
+ if value != "" {
+ return helper.Hash(value)
+ }
+ }
+
+ return ""
+}
+
+func (e *AtomEntry) GetContent() string {
+ if e.Content.Type == "html" || e.Content.Type == "text" {
+ return e.Content.Data
+ }
+
+ if e.Content.Type == "xhtml" {
+ return e.Content.Xml
+ }
+
+ if e.Summary != "" {
+ return e.Summary
+ }
+
+ if e.MediaGroup.Description != "" {
+ return e.MediaGroup.Description
+ }
+
+ return ""
+}
+
+func (e *AtomEntry) GetEnclosures() model.EnclosureList {
+ enclosures := make(model.EnclosureList, 0)
+
+ for _, link := range e.Links {
+ if strings.ToLower(link.Rel) == "enclosure" {
+ length, _ := strconv.Atoi(link.Length)
+ enclosures = append(enclosures, &model.Enclosure{URL: link.Url, MimeType: link.Type, Size: length})
+ }
+ }
+
+ return enclosures
+}
+
+func (e *AtomEntry) Transform() *model.Entry {
+ entry := new(model.Entry)
+ entry.URL = e.GetURL()
+ entry.Date = e.GetDate()
+ entry.Author = sanitizer.StripTags(e.GetAuthor())
+ entry.Hash = e.GetHash()
+ entry.Content = processor.ItemContentProcessor(entry.URL, e.GetContent())
+ entry.Title = sanitizer.StripTags(strings.Trim(e.Title, " \n\t"))
+ entry.Enclosures = e.GetEnclosures()
+
+ if entry.Title == "" {
+ entry.Title = entry.URL
+ }
+
+ return entry
+}
+
+func getAuthor(author Author) string {
+ if author.Name != "" {
+ return author.Name
+ }
+
+ if author.Email != "" {
+ return author.Email
+ }
+
+ return ""
+}
diff --git a/reader/feed/atom/parser.go b/reader/feed/atom/parser.go
new file mode 100644
index 0000000..0b8f0d7
--- /dev/null
+++ b/reader/feed/atom/parser.go
@@ -0,0 +1,28 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package atom
+
+import (
+ "encoding/xml"
+ "fmt"
+ "github.com/miniflux/miniflux2/model"
+ "io"
+
+ "golang.org/x/net/html/charset"
+)
+
+// Parse returns a normalized feed struct.
+func Parse(data io.Reader) (*model.Feed, error) {
+ atomFeed := new(AtomFeed)
+ decoder := xml.NewDecoder(data)
+ decoder.CharsetReader = charset.NewReaderLabel
+
+ err := decoder.Decode(atomFeed)
+ if err != nil {
+ return nil, fmt.Errorf("Unable to parse Atom feed: %v\n", err)
+ }
+
+ return atomFeed.Transform(), nil
+}
diff --git a/reader/feed/atom/parser_test.go b/reader/feed/atom/parser_test.go
new file mode 100644
index 0000000..39d9420
--- /dev/null
+++ b/reader/feed/atom/parser_test.go
@@ -0,0 +1,319 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package atom
+
+import (
+ "bytes"
+ "testing"
+ "time"
+)
+
+func TestParseAtomSample(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <author>
+ <name>John Doe</name>
+ </author>
+ <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
+
+ <entry>
+ <title>Atom-Powered Robots Run Amok</title>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary>Some text.</summary>
+ </entry>
+
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "Example Feed" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+
+ if feed.FeedURL != "" {
+ t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
+ }
+
+ if feed.SiteURL != "http://example.org/" {
+ t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) {
+ t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
+ }
+
+ if feed.Entries[0].Hash != "3841e5cf232f5111fc5841e9eba5f4b26d95e7d7124902e0f7272729d65601a6" {
+ t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
+ }
+
+ if feed.Entries[0].URL != "http://example.org/2003/12/13/atom03" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+ }
+
+ if feed.Entries[0].Title != "Atom-Powered Robots Run Amok" {
+ t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+ }
+
+ if feed.Entries[0].Content != "Some text." {
+ t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
+ }
+
+ if feed.Entries[0].Author != "John Doe" {
+ t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+ }
+}
+
+func TestParseFeedWithoutTitle(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="https://example.org/"/>
+ <link rel="self" type="application/atom+xml" href="https://example.org/feed"/>
+ <updated>2003-12-13T18:30:02Z</updated>
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "https://example.org/" {
+ t.Errorf("Incorrect feed title, got: %s", feed.Title)
+ }
+}
+
+func TestParseEntryWithoutTitle(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <author>
+ <name>John Doe</name>
+ </author>
+ <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
+
+ <entry>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary>Some text.</summary>
+ </entry>
+
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Title != "http://example.org/2003/12/13/atom03" {
+ t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+ }
+}
+
+func TestParseFeedURL(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Example Feed</title>
+ <link rel="alternate" type="text/html" href="https://example.org/"/>
+ <link rel="self" type="application/atom+xml" href="https://example.org/feed"/>
+ <updated>2003-12-13T18:30:02Z</updated>
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.SiteURL != "https://example.org/" {
+ t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
+ }
+
+ if feed.FeedURL != "https://example.org/feed" {
+ t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
+ }
+}
+
+func TestParseEntryTitleWithWhitespaces(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+
+ <entry>
+ <title>
+ Some Title
+ </title>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary>Some text.</summary>
+ </entry>
+
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Title != "Some Title" {
+ t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+ }
+}
+
+func TestParseEntryWithAuthorName(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+
+ <entry>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary>Some text.</summary>
+ <author>
+ <name>Me</name>
+ <email>me@localhost</email>
+ </author>
+ </entry>
+
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Author != "Me" {
+ t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+ }
+}
+
+func TestParseEntryWithoutAuthorName(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+
+ <entry>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary>Some text.</summary>
+ <author>
+ <name/>
+ <email>me@localhost</email>
+ </author>
+ </entry>
+
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Author != "me@localhost" {
+ t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+ }
+}
+
+func TestParseEntryWithEnclosures(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <id>http://www.example.org/myfeed</id>
+ <title>My Podcast Feed</title>
+ <updated>2005-07-15T12:00:00Z</updated>
+ <author>
+ <name>John Doe</name>
+ </author>
+ <link href="http://example.org" />
+ <link rel="self" href="http://example.org/myfeed" />
+ <entry>
+ <id>http://www.example.org/entries/1</id>
+ <title>Atom 1.0</title>
+ <updated>2005-07-15T12:00:00Z</updated>
+ <link href="http://www.example.org/entries/1" />
+ <summary>An overview of Atom 1.0</summary>
+ <link rel="enclosure"
+ type="audio/mpeg"
+ title="MP3"
+ href="http://www.example.org/myaudiofile.mp3"
+ length="1234" />
+ <link rel="enclosure"
+ type="application/x-bittorrent"
+ title="BitTorrent"
+ href="http://www.example.org/myaudiofile.torrent"
+ length="4567" />
+ <content type="xhtml">
+ <div xmlns="http://www.w3.org/1999/xhtml">
+ <h1>Show Notes</h1>
+ <ul>
+ <li>00:01:00 -- Introduction</li>
+ <li>00:15:00 -- Talking about Atom 1.0</li>
+ <li>00:30:00 -- Wrapping up</li>
+ </ul>
+ </div>
+ </content>
+ </entry>
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].URL != "http://www.example.org/entries/1" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+ }
+
+ if len(feed.Entries[0].Enclosures) != 2 {
+ t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+ }
+
+ if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
+ t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
+ }
+
+ if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
+ t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
+ }
+
+ if feed.Entries[0].Enclosures[0].Size != 1234 {
+ t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
+ }
+
+ if feed.Entries[0].Enclosures[1].URL != "http://www.example.org/myaudiofile.torrent" {
+ t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[1].URL)
+ }
+
+ if feed.Entries[0].Enclosures[1].MimeType != "application/x-bittorrent" {
+ t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[1].MimeType)
+ }
+
+ if feed.Entries[0].Enclosures[1].Size != 4567 {
+ t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[1].Size)
+ }
+}
diff --git a/reader/feed/date/parser.go b/reader/feed/date/parser.go
new file mode 100644
index 0000000..e573ff8
--- /dev/null
+++ b/reader/feed/date/parser.go
@@ -0,0 +1,203 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package date
+
+import (
+ "fmt"
+ "strings"
+ "time"
+)
+
+// DateFormats taken from github.com/mjibson/goread
+var dateFormats = []string{
+ time.RFC822, // RSS
+ time.RFC822Z, // RSS
+ time.RFC3339, // Atom
+ time.UnixDate,
+ time.RubyDate,
+ time.RFC850,
+ time.RFC1123Z,
+ time.RFC1123,
+ time.ANSIC,
+ "Mon, January 2 2006 15:04:05 -0700",
+ "Mon, January 02, 2006, 15:04:05 MST",
+ "Mon, January 02, 2006 15:04:05 MST",
+ "Mon, Jan 2, 2006 15:04 MST",
+ "Mon, Jan 2 2006 15:04 MST",
+ "Mon, Jan 2, 2006 15:04:05 MST",
+ "Mon, Jan 2 2006 15:04:05 -700",
+ "Mon, Jan 2 2006 15:04:05 -0700",
+ "Mon Jan 2 15:04 2006",
+ "Mon Jan 2 15:04:05 2006 MST",
+ "Mon Jan 02, 2006 3:04 pm",
+ "Mon, Jan 02,2006 15:04:05 MST",
+ "Mon Jan 02 2006 15:04:05 -0700",
+ "Monday, January 2, 2006 15:04:05 MST",
+ "Monday, January 2, 2006 03:04 PM",
+ "Monday, January 2, 2006",
+ "Monday, January 02, 2006",
+ "Monday, 2 January 2006 15:04:05 MST",
+ "Monday, 2 January 2006 15:04:05 -0700",
+ "Monday, 2 Jan 2006 15:04:05 MST",
+ "Monday, 2 Jan 2006 15:04:05 -0700",
+ "Monday, 02 January 2006 15:04:05 MST",
+ "Monday, 02 January 2006 15:04:05 -0700",
+ "Monday, 02 January 2006 15:04:05",
+ "Mon, 2 January 2006 15:04 MST",
+ "Mon, 2 January 2006, 15:04 -0700",
+ "Mon, 2 January 2006, 15:04:05 MST",
+ "Mon, 2 January 2006 15:04:05 MST",
+ "Mon, 2 January 2006 15:04:05 -0700",
+ "Mon, 2 January 2006",
+ "Mon, 2 Jan 2006 3:04:05 PM -0700",
+ "Mon, 2 Jan 2006 15:4:5 MST",
+ "Mon, 2 Jan 2006 15:4:5 -0700 GMT",
+ "Mon, 2, Jan 2006 15:4",
+ "Mon, 2 Jan 2006 15:04 MST",
+ "Mon, 2 Jan 2006, 15:04 -0700",
+ "Mon, 2 Jan 2006 15:04 -0700",
+ "Mon, 2 Jan 2006 15:04:05 UT",
+ "Mon, 2 Jan 2006 15:04:05MST",
+ "Mon, 2 Jan 2006 15:04:05 MST",
+ "Mon 2 Jan 2006 15:04:05 MST",
+ "mon,2 Jan 2006 15:04:05 MST",
+ "Mon, 2 Jan 2006 15:04:05 -0700 MST",
+ "Mon, 2 Jan 2006 15:04:05-0700",
+ "Mon, 2 Jan 2006 15:04:05 -0700",
+ "Mon, 2 Jan 2006 15:04:05",
+ "Mon, 2 Jan 2006 15:04",
+ "Mon,2 Jan 2006",
+ "Mon, 2 Jan 2006",
+ "Mon, 2 Jan 15:04:05 MST",
+ "Mon, 2 Jan 06 15:04:05 MST",
+ "Mon, 2 Jan 06 15:04:05 -0700",
+ "Mon, 2006-01-02 15:04",
+ "Mon,02 January 2006 14:04:05 MST",
+ "Mon, 02 January 2006",
+ "Mon, 02 Jan 2006 3:04:05 PM MST",
+ "Mon, 02 Jan 2006 15 -0700",
+ "Mon,02 Jan 2006 15:04 MST",
+ "Mon, 02 Jan 2006 15:04 MST",
+ "Mon, 02 Jan 2006 15:04 -0700",
+ "Mon, 02 Jan 2006 15:04:05 Z",
+ "Mon, 02 Jan 2006 15:04:05 UT",
+ "Mon, 02 Jan 2006 15:04:05 MST-07:00",
+ "Mon, 02 Jan 2006 15:04:05 MST -0700",
+ "Mon, 02 Jan 2006, 15:04:05 MST",
+ "Mon, 02 Jan 2006 15:04:05MST",
+ "Mon, 02 Jan 2006 15:04:05 MST",
+ "Mon , 02 Jan 2006 15:04:05 MST",
+ "Mon, 02 Jan 2006 15:04:05 GMT-0700",
+ "Mon,02 Jan 2006 15:04:05 -0700",
+ "Mon, 02 Jan 2006 15:04:05 -0700",
+ "Mon, 02 Jan 2006 15:04:05 -07:00",
+ "Mon, 02 Jan 2006 15:04:05 --0700",
+ "Mon 02 Jan 2006 15:04:05 -0700",
+ "Mon, 02 Jan 2006 15:04:05 -07",
+ "Mon, 02 Jan 2006 15:04:05 00",
+ "Mon, 02 Jan 2006 15:04:05",
+ "Mon, 02 Jan 2006",
+ "Mon, 02 Jan 06 15:04:05 MST",
+ "January 2, 2006 3:04 PM",
+ "January 2, 2006, 3:04 p.m.",
+ "January 2, 2006 15:04:05 MST",
+ "January 2, 2006 15:04:05",
+ "January 2, 2006 03:04 PM",
+ "January 2, 2006",
+ "January 02, 2006 15:04:05 MST",
+ "January 02, 2006 15:04",
+ "January 02, 2006 03:04 PM",
+ "January 02, 2006",
+ "Jan 2, 2006 3:04:05 PM MST",
+ "Jan 2, 2006 3:04:05 PM",
+ "Jan 2, 2006 15:04:05 MST",
+ "Jan 2, 2006",
+ "Jan 02 2006 03:04:05PM",
+ "Jan 02, 2006",
+ "6/1/2 15:04",
+ "6-1-2 15:04",
+ "2 January 2006 15:04:05 MST",
+ "2 January 2006 15:04:05 -0700",
+ "2 January 2006",
+ "2 Jan 2006 15:04:05 Z",
+ "2 Jan 2006 15:04:05 MST",
+ "2 Jan 2006 15:04:05 -0700",
+ "2 Jan 2006",
+ "2.1.2006 15:04:05",
+ "2/1/2006",
+ "2-1-2006",
+ "2006 January 02",
+ "2006-1-2T15:04:05Z",
+ "2006-1-2 15:04:05",
+ "2006-1-2",
+ "2006-1-02T15:04:05Z",
+ "2006-01-02T15:04Z",
+ "2006-01-02T15:04-07:00",
+ "2006-01-02T15:04:05Z",
+ "2006-01-02T15:04:05-07:00:00",
+ "2006-01-02T15:04:05:-0700",
+ "2006-01-02T15:04:05-0700",
+ "2006-01-02T15:04:05-07:00",
+ "2006-01-02T15:04:05 -0700",
+ "2006-01-02T15:04:05:00",
+ "2006-01-02T15:04:05",
+ "2006-01-02 at 15:04:05",
+ "2006-01-02 15:04:05Z",
+ "2006-01-02 15:04:05 MST",
+ "2006-01-02 15:04:05-0700",
+ "2006-01-02 15:04:05-07:00",
+ "2006-01-02 15:04:05 -0700",
+ "2006-01-02 15:04",
+ "2006-01-02 00:00:00.0 15:04:05.0 -0700",
+ "2006/01/02",
+ "2006-01-02",
+ "15:04 02.01.2006 -0700",
+ "1/2/2006 3:04 PM MST",
+ "1/2/2006 3:04:05 PM MST",
+ "1/2/2006 3:04:05 PM",
+ "1/2/2006 15:04:05 MST",
+ "1/2/2006",
+ "06/1/2 15:04",
+ "06-1-2 15:04",
+ "02 Monday, Jan 2006 15:04",
+ "02 Jan 2006 15:04 MST",
+ "02 Jan 2006 15:04:05 UT",
+ "02 Jan 2006 15:04:05 MST",
+ "02 Jan 2006 15:04:05 -0700",
+ "02 Jan 2006 15:04:05",
+ "02 Jan 2006",
+ "02/01/2006 15:04 MST",
+ "02-01-2006 15:04:05 MST",
+ "02.01.2006 15:04:05",
+ "02/01/2006 15:04:05",
+ "02.01.2006 15:04",
+ "02/01/2006 - 15:04",
+ "02.01.2006 -0700",
+ "02/01/2006",
+ "02-01-2006",
+ "01/02/2006 3:04 PM",
+ "01/02/2006 15:04:05 MST",
+ "01/02/2006 - 15:04",
+ "01/02/2006",
+ "01-02-2006",
+}
+
+// Parse parses a given date string using a large
+// list of commonly found feed date formats.
+func Parse(ds string) (t time.Time, err error) {
+ d := strings.TrimSpace(ds)
+ if d == "" {
+ return t, fmt.Errorf("Date string is empty")
+ }
+
+ for _, f := range dateFormats {
+ if t, err = time.Parse(f, d); err == nil {
+ return
+ }
+ }
+
+ err = fmt.Errorf("Failed to parse date: %s", ds)
+ return
+}
diff --git a/reader/feed/handler.go b/reader/feed/handler.go
new file mode 100644
index 0000000..27ff126
--- /dev/null
+++ b/reader/feed/handler.go
@@ -0,0 +1,152 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package feed
+
+import (
+ "fmt"
+ "github.com/miniflux/miniflux2/errors"
+ "github.com/miniflux/miniflux2/helper"
+ "github.com/miniflux/miniflux2/model"
+ "github.com/miniflux/miniflux2/reader/http"
+ "github.com/miniflux/miniflux2/reader/icon"
+ "github.com/miniflux/miniflux2/storage"
+ "log"
+ "time"
+)
+
+var (
+ errRequestFailed = "Unable to execute request: %v"
+ errServerFailure = "Unable to fetch feed (statusCode=%d)."
+ errDuplicate = "This feed already exists (%s)."
+ errNotFound = "Feed %d not found"
+)
+
+// Handler contains all the logic to create and refresh feeds.
+type Handler struct {
+ store *storage.Storage
+}
+
+// CreateFeed fetch, parse and store a new feed.
+func (h *Handler) CreateFeed(userID, categoryID int64, url string) (*model.Feed, error) {
+ defer helper.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:CreateFeed] feedUrl=%s", url))
+
+ client := http.NewHttpClient(url)
+ response, err := client.Get()
+ if err != nil {
+ return nil, errors.NewLocalizedError(errRequestFailed, err)
+ }
+
+ if response.HasServerFailure() {
+ return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode)
+ }
+
+ if h.store.FeedURLExists(userID, response.EffectiveURL) {
+ return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
+ }
+
+ subscription, err := parseFeed(response.Body)
+ if err != nil {
+ return nil, err
+ }
+
+ subscription.Category = &model.Category{ID: categoryID}
+ subscription.EtagHeader = response.ETag
+ subscription.LastModifiedHeader = response.LastModified
+ subscription.FeedURL = response.EffectiveURL
+ subscription.UserID = userID
+
+ err = h.store.CreateFeed(subscription)
+ if err != nil {
+ return nil, err
+ }
+
+ log.Println("[Handler:CreateFeed] Feed saved with ID:", subscription.ID)
+
+ icon, err := icon.FindIcon(subscription.SiteURL)
+ if err != nil {
+ log.Println(err)
+ } else if icon == nil {
+ log.Printf("No icon found for feedID=%d\n", subscription.ID)
+ } else {
+ h.store.CreateFeedIcon(subscription, icon)
+ }
+
+ return subscription, nil
+}
+
+// RefreshFeed fetch and update a feed if necessary.
+func (h *Handler) RefreshFeed(userID, feedID int64) error {
+ defer helper.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:RefreshFeed] feedID=%d", feedID))
+
+ originalFeed, err := h.store.GetFeedById(userID, feedID)
+ if err != nil {
+ return err
+ }
+
+ if originalFeed == nil {
+ return errors.NewLocalizedError(errNotFound, feedID)
+ }
+
+ client := http.NewHttpClientWithCacheHeaders(originalFeed.FeedURL, originalFeed.EtagHeader, originalFeed.LastModifiedHeader)
+ response, err := client.Get()
+ if err != nil {
+ customErr := errors.NewLocalizedError(errRequestFailed, err)
+ originalFeed.ParsingErrorCount++
+ originalFeed.ParsingErrorMsg = customErr.Error()
+ h.store.UpdateFeed(originalFeed)
+ return customErr
+ }
+
+ originalFeed.CheckedAt = time.Now()
+
+ if response.HasServerFailure() {
+ err := errors.NewLocalizedError(errServerFailure, response.StatusCode)
+ originalFeed.ParsingErrorCount++
+ originalFeed.ParsingErrorMsg = err.Error()
+ h.store.UpdateFeed(originalFeed)
+ return err
+ }
+
+ if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
+ log.Printf("[Handler:RefreshFeed] Feed #%d has been modified\n", feedID)
+
+ subscription, err := parseFeed(response.Body)
+ if err != nil {
+ originalFeed.ParsingErrorCount++
+ originalFeed.ParsingErrorMsg = err.Error()
+ h.store.UpdateFeed(originalFeed)
+ return err
+ }
+
+ originalFeed.EtagHeader = response.ETag
+ originalFeed.LastModifiedHeader = response.LastModified
+
+ if err := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, subscription.Entries); err != nil {
+ return err
+ }
+
+ if !h.store.HasIcon(originalFeed.ID) {
+ log.Println("[Handler:RefreshFeed] Looking for feed icon")
+ icon, err := icon.FindIcon(originalFeed.SiteURL)
+ if err != nil {
+ log.Println("[Handler:RefreshFeed]", err)
+ } else {
+ h.store.CreateFeedIcon(originalFeed, icon)
+ }
+ }
+ } else {
+ log.Printf("[Handler:RefreshFeed] Feed #%d not modified\n", feedID)
+ }
+
+ originalFeed.ParsingErrorCount = 0
+ originalFeed.ParsingErrorMsg = ""
+
+ return h.store.UpdateFeed(originalFeed)
+}
+
+// NewFeedHandler returns a feed handler.
+func NewFeedHandler(store *storage.Storage) *Handler {
+ return &Handler{store: store}
+}
diff --git a/reader/feed/json/json.go b/reader/feed/json/json.go
new file mode 100644
index 0000000..e7d53c4
--- /dev/null
+++ b/reader/feed/json/json.go
@@ -0,0 +1,170 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "github.com/miniflux/miniflux2/helper"
+ "github.com/miniflux/miniflux2/model"
+ "github.com/miniflux/miniflux2/reader/feed/date"
+ "github.com/miniflux/miniflux2/reader/processor"
+ "github.com/miniflux/miniflux2/reader/sanitizer"
+ "log"
+ "strings"
+ "time"
+)
+
+type JsonFeed struct {
+ Version string `json:"version"`
+ Title string `json:"title"`
+ SiteURL string `json:"home_page_url"`
+ FeedURL string `json:"feed_url"`
+ Author JsonAuthor `json:"author"`
+ Items []JsonItem `json:"items"`
+}
+
+type JsonAuthor struct {
+ Name string `json:"name"`
+ URL string `json:"url"`
+}
+
+type JsonItem struct {
+ ID string `json:"id"`
+ URL string `json:"url"`
+ Title string `json:"title"`
+ Summary string `json:"summary"`
+ Text string `json:"content_text"`
+ Html string `json:"content_html"`
+ DatePublished string `json:"date_published"`
+ DateModified string `json:"date_modified"`
+ Author JsonAuthor `json:"author"`
+ Attachments []JsonAttachment `json:"attachments"`
+}
+
+type JsonAttachment struct {
+ URL string `json:"url"`
+ MimeType string `json:"mime_type"`
+ Title string `json:"title"`
+ Size int `json:"size_in_bytes"`
+ Duration int `json:"duration_in_seconds"`
+}
+
+func (j *JsonFeed) GetAuthor() string {
+ return getAuthor(j.Author)
+}
+
+func (j *JsonFeed) Transform() *model.Feed {
+ feed := new(model.Feed)
+ feed.FeedURL = j.FeedURL
+ feed.SiteURL = j.SiteURL
+ feed.Title = sanitizer.StripTags(j.Title)
+
+ if feed.Title == "" {
+ feed.Title = feed.SiteURL
+ }
+
+ for _, item := range j.Items {
+ entry := item.Transform()
+ if entry.Author == "" {
+ entry.Author = j.GetAuthor()
+ }
+
+ feed.Entries = append(feed.Entries, entry)
+ }
+
+ return feed
+}
+
+func (j *JsonItem) GetDate() time.Time {
+ for _, value := range []string{j.DatePublished, j.DateModified} {
+ if value != "" {
+ d, err := date.Parse(value)
+ if err != nil {
+ log.Println(err)
+ return time.Now()
+ }
+
+ return d
+ }
+ }
+
+ return time.Now()
+}
+
+func (j *JsonItem) GetAuthor() string {
+ return getAuthor(j.Author)
+}
+
+func (j *JsonItem) GetHash() string {
+ for _, value := range []string{j.ID, j.URL, j.Text + j.Html + j.Summary} {
+ if value != "" {
+ return helper.Hash(value)
+ }
+ }
+
+ return ""
+}
+
+func (j *JsonItem) GetTitle() string {
+ for _, value := range []string{j.Title, j.Summary, j.Text, j.Html} {
+ if value != "" {
+ return truncate(value)
+ }
+ }
+
+ return j.URL
+}
+
+func (j *JsonItem) GetContent() string {
+ for _, value := range []string{j.Html, j.Text, j.Summary} {
+ if value != "" {
+ return value
+ }
+ }
+
+ return ""
+}
+
+func (j *JsonItem) GetEnclosures() model.EnclosureList {
+ enclosures := make(model.EnclosureList, 0)
+
+ for _, attachment := range j.Attachments {
+ enclosures = append(enclosures, &model.Enclosure{
+ URL: attachment.URL,
+ MimeType: attachment.MimeType,
+ Size: attachment.Size,
+ })
+ }
+
+ return enclosures
+}
+
+func (j *JsonItem) Transform() *model.Entry {
+ entry := new(model.Entry)
+ entry.URL = j.URL
+ entry.Date = j.GetDate()
+ entry.Author = sanitizer.StripTags(j.GetAuthor())
+ entry.Hash = j.GetHash()
+ entry.Content = processor.ItemContentProcessor(entry.URL, j.GetContent())
+ entry.Title = sanitizer.StripTags(strings.Trim(j.GetTitle(), " \n\t"))
+ entry.Enclosures = j.GetEnclosures()
+ return entry
+}
+
+func getAuthor(author JsonAuthor) string {
+ if author.Name != "" {
+ return author.Name
+ }
+
+ return ""
+}
+
+func truncate(str string) string {
+ max := 100
+ if len(str) > max {
+ return str[:max] + "..."
+ }
+
+ return str
+}
diff --git a/reader/feed/json/parser.go b/reader/feed/json/parser.go
new file mode 100644
index 0000000..816d28e
--- /dev/null
+++ b/reader/feed/json/parser.go
@@ -0,0 +1,23 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "encoding/json"
+ "fmt"
+ "github.com/miniflux/miniflux2/model"
+ "io"
+)
+
+// Parse returns a normalized feed struct.
+func Parse(data io.Reader) (*model.Feed, error) {
+ jsonFeed := new(JsonFeed)
+ decoder := json.NewDecoder(data)
+ if err := decoder.Decode(&jsonFeed); err != nil {
+ return nil, fmt.Errorf("Unable to parse JSON Feed: %v", err)
+ }
+
+ return jsonFeed.Transform(), nil
+}
diff --git a/reader/feed/json/parser_test.go b/reader/feed/json/parser_test.go
new file mode 100644
index 0000000..f2c97fc
--- /dev/null
+++ b/reader/feed/json/parser_test.go
@@ -0,0 +1,345 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "bytes"
+ "strings"
+ "testing"
+ "time"
+)
+
+func TestParseJsonFeed(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "id": "2",
+ "content_text": "This is a second item.",
+ "url": "https://example.org/second-item"
+ },
+ {
+ "id": "1",
+ "content_html": "<p>Hello, world!</p>",
+ "url": "https://example.org/initial-post"
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "My Example Feed" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+
+ if feed.FeedURL != "https://example.org/feed.json" {
+ t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
+ }
+
+ if feed.SiteURL != "https://example.org/" {
+ t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
+ }
+
+ if len(feed.Entries) != 2 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].Hash != "d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35" {
+ t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
+ }
+
+ if feed.Entries[0].URL != "https://example.org/second-item" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+ }
+
+ if feed.Entries[0].Title != "This is a second item." {
+ t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
+ }
+
+ if feed.Entries[0].Content != "This is a second item." {
+ t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
+ }
+
+ if feed.Entries[1].Hash != "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b" {
+ t.Errorf("Incorrect entry hash, got: %s", feed.Entries[1].Hash)
+ }
+
+ if feed.Entries[1].URL != "https://example.org/initial-post" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL)
+ }
+
+ if feed.Entries[1].Title != "Hello, world!" {
+ t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[1].Title)
+ }
+
+ if feed.Entries[1].Content != "<p>Hello, world!</p>" {
+ t.Errorf("Incorrect entry content, got: %s", feed.Entries[1].Content)
+ }
+}
+
+func TestParsePodcast(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
+ "title": "The Record",
+ "home_page_url": "http://therecord.co/",
+ "feed_url": "http://therecord.co/feed.json",
+ "items": [
+ {
+ "id": "http://therecord.co/chris-parrish",
+ "title": "Special #1 - Chris Parrish",
+ "url": "http://therecord.co/chris-parrish",
+ "content_text": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.",
+ "content_html": "Chris has worked at <a href=\"http://adobe.com/\">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href=\"http://aged-and-distilled.com/napkin/\">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href=\"http://www.ci.bainbridge-isl.wa.us/\">Bainbridge Island</a>, a quick ferry ride from Seattle.",
+ "summary": "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
+ "date_published": "2014-05-09T14:04:00-07:00",
+ "attachments": [
+ {
+ "url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
+ "mime_type": "audio/x-m4a",
+ "size_in_bytes": 89970236,
+ "duration_in_seconds": 6629
+ }
+ ]
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "The Record" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+
+ if feed.FeedURL != "http://therecord.co/feed.json" {
+ t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
+ }
+
+ if feed.SiteURL != "http://therecord.co/" {
+ t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].Hash != "6b678e57962a1b001e4e873756563cdc08bbd06ca561e764e0baa9a382485797" {
+ t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
+ }
+
+ if feed.Entries[0].URL != "http://therecord.co/chris-parrish" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+ }
+
+ if feed.Entries[0].Title != "Special #1 - Chris Parrish" {
+ t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
+ }
+
+ if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` {
+ t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content)
+ }
+
+ location, _ := time.LoadLocation("America/Vancouver")
+ if !feed.Entries[0].Date.Equal(time.Date(2014, time.May, 9, 14, 4, 0, 0, location)) {
+ t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
+ }
+
+ if len(feed.Entries[0].Enclosures) != 1 {
+ t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+ }
+
+ if feed.Entries[0].Enclosures[0].URL != "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a" {
+ t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
+ }
+
+ if feed.Entries[0].Enclosures[0].MimeType != "audio/x-m4a" {
+ t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
+ }
+
+ if feed.Entries[0].Enclosures[0].Size != 89970236 {
+ t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
+ }
+}
+
+func TestParseAuthor(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
+ "title": "Brent Simmons’s Microblog",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "author": {
+ "name": "Brent Simmons",
+ "url": "http://example.org/",
+ "avatar": "https://example.org/avatar.png"
+ },
+ "items": [
+ {
+ "id": "2347259",
+ "url": "https://example.org/2347259",
+ "content_text": "Cats are neat. \n\nhttps://example.org/cats",
+ "date_published": "2016-02-09T14:22:00-07:00"
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].Author != "Brent Simmons" {
+ t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+ }
+}
+
+func TestParseFeedWithoutTitle(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "id": "2347259",
+ "url": "https://example.org/2347259",
+ "content_text": "Cats are neat. \n\nhttps://example.org/cats",
+ "date_published": "2016-02-09T14:22:00-07:00"
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "https://example.org/" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+}
+
+func TestParseFeedItemWithInvalidDate(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "id": "2347259",
+ "url": "https://example.org/2347259",
+ "content_text": "Cats are neat. \n\nhttps://example.org/cats",
+ "date_published": "Tomorrow"
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if !feed.Entries[0].Date.Before(time.Now()) {
+ t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
+ }
+}
+
+func TestParseFeedItemWithoutID(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "content_text": "Some text."
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].Hash != "13b4c5aecd1b6d749afcee968fbf9c80f1ed1bbdbe1aaf25cb34ebd01144bbe9" {
+ t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
+ }
+}
+
+func TestParseFeedItemWithoutTitle(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "url": "https://example.org/item"
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].Title != "https://example.org/item" {
+ t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+ }
+}
+
+func TestParseTruncateItemTitle(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "title": "` + strings.Repeat("a", 200) + `"
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if len(feed.Entries[0].Title) != 103 {
+ t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+ }
+}
diff --git a/reader/feed/parser.go b/reader/feed/parser.go
new file mode 100644
index 0000000..8df6b46
--- /dev/null
+++ b/reader/feed/parser.go
@@ -0,0 +1,82 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package feed
+
+import (
+ "bytes"
+ "encoding/xml"
+ "errors"
+ "github.com/miniflux/miniflux2/helper"
+ "github.com/miniflux/miniflux2/model"
+ "github.com/miniflux/miniflux2/reader/feed/atom"
+ "github.com/miniflux/miniflux2/reader/feed/json"
+ "github.com/miniflux/miniflux2/reader/feed/rss"
+ "io"
+ "strings"
+ "time"
+
+ "golang.org/x/net/html/charset"
+)
+
+const (
+ FormatRss = "rss"
+ FormatAtom = "atom"
+ FormatJson = "json"
+ FormatUnknown = "unknown"
+)
+
+func DetectFeedFormat(data io.Reader) string {
+ defer helper.ExecutionTime(time.Now(), "[Feed:DetectFeedFormat]")
+
+ var buffer bytes.Buffer
+ tee := io.TeeReader(data, &buffer)
+
+ decoder := xml.NewDecoder(tee)
+ decoder.CharsetReader = charset.NewReaderLabel
+
+ for {
+ token, _ := decoder.Token()
+ if token == nil {
+ break
+ }
+
+ if element, ok := token.(xml.StartElement); ok {
+ switch element.Name.Local {
+ case "rss":
+ return FormatRss
+ case "feed":
+ return FormatAtom
+ }
+ }
+ }
+
+ if strings.HasPrefix(strings.TrimSpace(buffer.String()), "{") {
+ return FormatJson
+ }
+
+ return FormatUnknown
+}
+
+func parseFeed(data io.Reader) (*model.Feed, error) {
+ defer helper.ExecutionTime(time.Now(), "[Feed:ParseFeed]")
+
+ var buffer bytes.Buffer
+ io.Copy(&buffer, data)
+
+ reader := bytes.NewReader(buffer.Bytes())
+ format := DetectFeedFormat(reader)
+ reader.Seek(0, io.SeekStart)
+
+ switch format {
+ case FormatAtom:
+ return atom.Parse(reader)
+ case FormatRss:
+ return rss.Parse(reader)
+ case FormatJson:
+ return json.Parse(reader)
+ default:
+ return nil, errors.New("Unsupported feed format")
+ }
+}
diff --git a/reader/feed/parser_test.go b/reader/feed/parser_test.go
new file mode 100644
index 0000000..0dd8dd6
--- /dev/null
+++ b/reader/feed/parser_test.go
@@ -0,0 +1,169 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package feed
+
+import (
+ "bytes"
+ "testing"
+)
+
+func TestDetectRSS(t *testing.T) {
+ data := `<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`
+ format := DetectFeedFormat(bytes.NewBufferString(data))
+
+ if format != FormatRss {
+ t.Errorf("Wrong format detected: %s instead of %s", format, FormatRss)
+ }
+}
+
+func TestDetectAtom(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`
+ format := DetectFeedFormat(bytes.NewBufferString(data))
+
+ if format != FormatAtom {
+ t.Errorf("Wrong format detected: %s instead of %s", format, FormatAtom)
+ }
+}
+
+func TestDetectAtomWithISOCharset(t *testing.T) {
+ data := `<?xml version="1.0" encoding="ISO-8859-15"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`
+ format := DetectFeedFormat(bytes.NewBufferString(data))
+
+ if format != FormatAtom {
+ t.Errorf("Wrong format detected: %s instead of %s", format, FormatAtom)
+ }
+}
+
+func TestDetectJSON(t *testing.T) {
+ data := `
+ {
+ "version" : "https://jsonfeed.org/version/1",
+ "title" : "Example"
+ }
+ `
+ format := DetectFeedFormat(bytes.NewBufferString(data))
+
+ if format != FormatJson {
+ t.Errorf("Wrong format detected: %s instead of %s", format, FormatJson)
+ }
+}
+
+func TestDetectUnknown(t *testing.T) {
+ data := `
+ <!DOCTYPE html> <html> </html>
+ `
+ format := DetectFeedFormat(bytes.NewBufferString(data))
+
+ if format != FormatUnknown {
+ t.Errorf("Wrong format detected: %s instead of %s", format, FormatUnknown)
+ }
+}
+
+func TestParseAtom(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <author>
+ <name>John Doe</name>
+ </author>
+ <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
+
+ <entry>
+ <title>Atom-Powered Robots Run Amok</title>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary>Some text.</summary>
+ </entry>
+
+ </feed>`
+
+ feed, err := parseFeed(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "Example Feed" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+}
+
+func TestParseRss(t *testing.T) {
+ data := `<?xml version="1.0"?>
+ <rss version="2.0">
+ <channel>
+ <title>Liftoff News</title>
+ <link>http://liftoff.msfc.nasa.gov/</link>
+ <item>
+ <title>Star City</title>
+ <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
+ <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
+ <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
+ <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := parseFeed(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "Liftoff News" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+}
+
+func TestParseJson(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "id": "2",
+ "content_text": "This is a second item.",
+ "url": "https://example.org/second-item"
+ },
+ {
+ "id": "1",
+ "content_html": "<p>Hello, world!</p>",
+ "url": "https://example.org/initial-post"
+ }
+ ]
+ }`
+
+ feed, err := parseFeed(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "My Example Feed" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+}
+
+func TestParseUnknownFeed(t *testing.T) {
+ data := `
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+ <html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>Title of document</title>
+ </head>
+ <body>
+ some content
+ </body>
+ </html>
+ `
+
+ _, err := parseFeed(bytes.NewBufferString(data))
+ if err == nil {
+ t.Error("ParseFeed must returns an error")
+ }
+}
diff --git a/reader/feed/rss/parser.go b/reader/feed/rss/parser.go
new file mode 100644
index 0000000..52310e4
--- /dev/null
+++ b/reader/feed/rss/parser.go
@@ -0,0 +1,28 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package rss
+
+import (
+ "encoding/xml"
+ "fmt"
+ "github.com/miniflux/miniflux2/model"
+ "io"
+
+ "golang.org/x/net/html/charset"
+)
+
+// Parse returns a normalized feed struct.
+func Parse(data io.Reader) (*model.Feed, error) {
+ rssFeed := new(RssFeed)
+ decoder := xml.NewDecoder(data)
+ decoder.CharsetReader = charset.NewReaderLabel
+
+ err := decoder.Decode(rssFeed)
+ if err != nil {
+ return nil, fmt.Errorf("Unable to parse RSS feed: %v", err)
+ }
+
+ return rssFeed.Transform(), nil
+}
diff --git a/reader/feed/rss/parser_test.go b/reader/feed/rss/parser_test.go
new file mode 100644
index 0000000..42dc965
--- /dev/null
+++ b/reader/feed/rss/parser_test.go
@@ -0,0 +1,466 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package rss
+
+import (
+ "bytes"
+ "testing"
+ "time"
+)
+
+func TestParseRss2Sample(t *testing.T) {
+ data := `
+ <?xml version="1.0"?>
+ <rss version="2.0">
+ <channel>
+ <title>Liftoff News</title>
+ <link>http://liftoff.msfc.nasa.gov/</link>
+ <description>Liftoff to Space Exploration.</description>
+ <language>en-us</language>
+ <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
+ <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
+ <docs>http://blogs.law.harvard.edu/tech/rss</docs>
+ <generator>Weblog Editor 2.0</generator>
+ <managingEditor>editor@example.com</managingEditor>
+ <webMaster>webmaster@example.com</webMaster>
+ <item>
+ <title>Star City</title>
+ <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
+ <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
+ <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
+ <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
+ </item>
+ <item>
+ <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
+ <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
+ <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
+ </item>
+ <item>
+ <title>The Engine That Does More</title>
+ <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
+ <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
+ <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
+ <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
+ </item>
+ <item>
+ <title>Astronauts' Dirty Laundry</title>
+ <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
+ <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
+ <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
+ <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "Liftoff News" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+
+ if feed.FeedURL != "" {
+ t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
+ }
+
+ if feed.SiteURL != "http://liftoff.msfc.nasa.gov/" {
+ t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
+ }
+
+ if len(feed.Entries) != 4 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ expectedDate := time.Date(2003, time.June, 3, 9, 39, 21, 0, time.UTC)
+ if !feed.Entries[0].Date.Equal(expectedDate) {
+ t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
+ }
+
+ if feed.Entries[0].Hash != "5b2b4ac2fe1786ddf0fd2da2f1b07f64e691264f41f2db3ea360f31bb6d9152b" {
+ t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
+ }
+
+ if feed.Entries[0].URL != "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+ }
+
+ if feed.Entries[0].Title != "Star City" {
+ t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+ }
+
+ if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` {
+ t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
+ }
+}
+
+func TestParseFeedWithoutTitle(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0">
+ <channel>
+ <link>https://example.org/</link>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "https://example.org/" {
+ t.Errorf("Incorrect feed title, got: %s", feed.Title)
+ }
+}
+
+func TestParseEntryWithoutTitle(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0">
+ <channel>
+ <link>https://example.org/</link>
+ <item>
+ <link>https://example.org/item</link>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Title != "https://example.org/item" {
+ t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+ }
+}
+
+func TestParseFeedURLWithAtomLink(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
+ <channel>
+ <title>Example</title>
+ <link>https://example.org/</link>
+ <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.FeedURL != "https://example.org/rss" {
+ t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
+ }
+
+ if feed.SiteURL != "https://example.org/" {
+ t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
+ }
+}
+
+func TestParseEntryWithAtomAuthor(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
+ <channel>
+ <title>Example</title>
+ <link>https://example.org/</link>
+ <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
+ <item>
+ <title>Test</title>
+ <link>https://example.org/item</link>
+ <author xmlns:author="http://www.w3.org/2005/Atom">
+ <name>Foo Bar</name>
+ <title>Vice President</title>
+ <department/>
+ <company>FooBar Inc.</company>
+ </author>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Author != "Foo Bar" {
+ t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+ }
+}
+
+func TestParseEntryWithDublinCoreAuthor(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <channel>
+ <title>Example</title>
+ <link>https://example.org/</link>
+ <item>
+ <title>Test</title>
+ <link>https://example.org/item</link>
+ <dc:creator>Me (me@example.com)</dc:creator>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Author != "Me (me@example.com)" {
+ t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+ }
+}
+
+func TestParseEntryWithItunesAuthor(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
+ <channel>
+ <title>Example</title>
+ <link>https://example.org/</link>
+ <item>
+ <title>Test</title>
+ <link>https://example.org/item</link>
+ <itunes:author>Someone</itunes:author>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Author != "Someone" {
+ t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+ }
+}
+
+func TestParseFeedWithItunesAuthor(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
+ <channel>
+ <title>Example</title>
+ <link>https://example.org/</link>
+ <itunes:author>Someone</itunes:author>
+ <item>
+ <title>Test</title>
+ <link>https://example.org/item</link>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Author != "Someone" {
+ t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+ }
+}
+
+func TestParseEntryWithDublinCoreDate(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <channel>
+ <title>Example</title>
+ <link>http://example.org/</link>
+ <item>
+ <title>Item 1</title>
+ <link>http://example.org/item1</link>
+ <description>Description.</description>
+ <guid isPermaLink="false">UUID</guid>
+ <dc:date>2002-09-29T23:40:06-05:00</dc:date>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ location, _ := time.LoadLocation("EST")
+ expectedDate := time.Date(2002, time.September, 29, 23, 40, 06, 0, location)
+ if !feed.Entries[0].Date.Equal(expectedDate) {
+ t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
+ }
+}
+
+func TestParseEntryWithContentEncoded(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
+ <channel>
+ <title>Example</title>
+ <link>http://example.org/</link>
+ <item>
+ <title>Item 1</title>
+ <link>http://example.org/item1</link>
+ <description>Description.</description>
+ <guid isPermaLink="false">UUID</guid>
+ <content:encoded><![CDATA[<p><a href="http://www.example.org/">Example</a>.</p>]]></content:encoded>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Content != `<p><a href="http://www.example.org/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Example</a>.</p>` {
+ t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
+ }
+}
+
+func TestParseEntryWithFeedBurnerLink(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
+ <channel>
+ <title>Example</title>
+ <link>http://example.org/</link>
+ <item>
+ <title>Item 1</title>
+ <link>http://example.org/item1</link>
+ <feedburner:origLink>http://example.org/original</feedburner:origLink>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].URL != "http://example.org/original" {
+ t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].URL)
+ }
+}
+
+func TestParseEntryTitleWithWhitespaces(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0">
+ <channel>
+ <title>Example</title>
+ <link>http://example.org</link>
+ <item>
+ <title>
+ Some Title
+ </title>
+ <link>http://www.example.org/entries/1</link>
+ <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Title != "Some Title" {
+ t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+ }
+}
+
+func TestParseEntryWithEnclosures(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0">
+ <channel>
+ <title>My Podcast Feed</title>
+ <link>http://example.org</link>
+ <author>some.email@example.org</author>
+ <item>
+ <title>Podcasting with RSS</title>
+ <link>http://www.example.org/entries/1</link>
+ <description>An overview of RSS podcasting</description>
+ <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
+ <guid isPermaLink="true">http://www.example.org/entries/1</guid>
+ <enclosure url="http://www.example.org/myaudiofile.mp3"
+ length="12345"
+ type="audio/mpeg" />
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].URL != "http://www.example.org/entries/1" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+ }
+
+ if len(feed.Entries[0].Enclosures) != 1 {
+ t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+ }
+
+ if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
+ t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
+ }
+
+ if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
+ t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
+ }
+
+ if feed.Entries[0].Enclosures[0].Size != 12345 {
+ t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
+ }
+}
+
+func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
+ <channel>
+ <title>My Example Feed</title>
+ <link>http://example.org</link>
+ <author>some.email@example.org</author>
+ <item>
+ <title>Example Item</title>
+ <link>http://www.example.org/entries/1</link>
+ <enclosure
+ url="http://feedproxy.google.com/~r/example/~5/lpMyFSCvubs/File.mp3"
+ length="76192460"
+ type="audio/mpeg" />
+ <feedburner:origEnclosureLink>http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3</feedburner:origEnclosureLink>
+ </item>
+ </channel>
+ </rss>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].URL != "http://www.example.org/entries/1" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+ }
+
+ if len(feed.Entries[0].Enclosures) != 1 {
+ t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+ }
+
+ if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
+ t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
+ }
+
+ if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
+ t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
+ }
+
+ if feed.Entries[0].Enclosures[0].Size != 76192460 {
+ t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
+ }
+}
diff --git a/reader/feed/rss/rss.go b/reader/feed/rss/rss.go
new file mode 100644
index 0000000..d88ed6b
--- /dev/null
+++ b/reader/feed/rss/rss.go
@@ -0,0 +1,207 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package rss
+
+import (
+ "encoding/xml"
+ "github.com/miniflux/miniflux2/helper"
+ "github.com/miniflux/miniflux2/model"
+ "github.com/miniflux/miniflux2/reader/feed/date"
+ "github.com/miniflux/miniflux2/reader/processor"
+ "github.com/miniflux/miniflux2/reader/sanitizer"
+ "log"
+ "path"
+ "strconv"
+ "strings"
+ "time"
+)
+
+type RssLink struct {
+ XMLName xml.Name
+ Data string `xml:",chardata"`
+ Href string `xml:"href,attr"`
+}
+
+type RssFeed struct {
+ XMLName xml.Name `xml:"rss"`
+ Version string `xml:"version,attr"`
+ Title string `xml:"channel>title"`
+ Links []RssLink `xml:"channel>link"`
+ Language string `xml:"channel>language"`
+ Description string `xml:"channel>description"`
+ PubDate string `xml:"channel>pubDate"`
+ ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"`
+ Items []RssItem `xml:"channel>item"`
+}
+
+type RssItem struct {
+ Guid string `xml:"guid"`
+ Title string `xml:"title"`
+ Link string `xml:"link"`
+ OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
+ Description string `xml:"description"`
+ Content string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
+ PubDate string `xml:"pubDate"`
+ Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
+ Authors []RssAuthor `xml:"author"`
+ Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
+ Enclosures []RssEnclosure `xml:"enclosure"`
+ OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
+}
+
+type RssAuthor struct {
+ XMLName xml.Name
+ Data string `xml:",chardata"`
+ Name string `xml:"name"`
+}
+
+type RssEnclosure struct {
+ Url string `xml:"url,attr"`
+ Type string `xml:"type,attr"`
+ Length string `xml:"length,attr"`
+}
+
+func (r *RssFeed) GetSiteURL() string {
+ for _, elem := range r.Links {
+ if elem.XMLName.Space == "" {
+ return elem.Data
+ }
+ }
+
+ return ""
+}
+
+func (r *RssFeed) GetFeedURL() string {
+ for _, elem := range r.Links {
+ if elem.XMLName.Space == "http://www.w3.org/2005/Atom" {
+ return elem.Href
+ }
+ }
+
+ return ""
+}
+
+func (r *RssFeed) Transform() *model.Feed {
+ feed := new(model.Feed)
+ feed.SiteURL = r.GetSiteURL()
+ feed.FeedURL = r.GetFeedURL()
+ feed.Title = sanitizer.StripTags(r.Title)
+
+ if feed.Title == "" {
+ feed.Title = feed.SiteURL
+ }
+
+ for _, item := range r.Items {
+ entry := item.Transform()
+
+ if entry.Author == "" && r.ItunesAuthor != "" {
+ entry.Author = r.ItunesAuthor
+ }
+ entry.Author = sanitizer.StripTags(entry.Author)
+
+ feed.Entries = append(feed.Entries, entry)
+ }
+
+ return feed
+}
+func (i *RssItem) GetDate() time.Time {
+ value := i.PubDate
+ if i.Date != "" {
+ value = i.Date
+ }
+
+ if value != "" {
+ result, err := date.Parse(value)
+ if err != nil {
+ log.Println(err)
+ return time.Now()
+ }
+
+ return result
+ }
+
+ return time.Now()
+}
+
+func (i *RssItem) GetAuthor() string {
+ for _, element := range i.Authors {
+ if element.Name != "" {
+ return element.Name
+ }
+
+ if element.Data != "" {
+ return element.Data
+ }
+ }
+
+ return i.Creator
+}
+
+func (i *RssItem) GetHash() string {
+ for _, value := range []string{i.Guid, i.Link} {
+ if value != "" {
+ return helper.Hash(value)
+ }
+ }
+
+ return ""
+}
+
+func (i *RssItem) GetContent() string {
+ if i.Content != "" {
+ return i.Content
+ }
+
+ return i.Description
+}
+
+func (i *RssItem) GetURL() string {
+ if i.OriginalLink != "" {
+ return i.OriginalLink
+ }
+
+ return i.Link
+}
+
+func (i *RssItem) GetEnclosures() model.EnclosureList {
+ enclosures := make(model.EnclosureList, 0)
+
+ for _, enclosure := range i.Enclosures {
+ length, _ := strconv.Atoi(enclosure.Length)
+ enclosureURL := enclosure.Url
+
+ if i.OrigEnclosureLink != "" {
+ filename := path.Base(i.OrigEnclosureLink)
+ if strings.Contains(enclosureURL, filename) {
+ enclosureURL = i.OrigEnclosureLink
+ }
+ }
+
+ enclosures = append(enclosures, &model.Enclosure{
+ URL: enclosureURL,
+ MimeType: enclosure.Type,
+ Size: length,
+ })
+ }
+
+ return enclosures
+}
+
+func (i *RssItem) Transform() *model.Entry {
+ entry := new(model.Entry)
+ entry.URL = i.GetURL()
+ entry.Date = i.GetDate()
+ entry.Author = i.GetAuthor()
+ entry.Hash = i.GetHash()
+ entry.Content = processor.ItemContentProcessor(entry.URL, i.GetContent())
+ entry.Title = sanitizer.StripTags(strings.Trim(i.Title, " \n\t"))
+ entry.Enclosures = i.GetEnclosures()
+
+ if entry.Title == "" {
+ entry.Title = entry.URL
+ }
+
+ return entry
+}