aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/feed
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2017-11-20 19:17:04 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2017-11-20 19:17:04 -0800
commitd5838b67340ca83dcc32f6d1775c183188ec5e7a (patch)
treea71705463458e5d2a6f74b0323119c3376f89b97 /reader/feed
parentc26787f47667f69a2d3e85db01fe20b9bc70bc9a (diff)
Move feed parsers packages in reader package
Diffstat (limited to 'reader/feed')
-rw-r--r--reader/feed/atom/atom.go193
-rw-r--r--reader/feed/atom/parser.go29
-rw-r--r--reader/feed/atom/parser_test.go333
-rw-r--r--reader/feed/date/parser.go203
-rw-r--r--reader/feed/json/json.go171
-rw-r--r--reader/feed/json/parser.go24
-rw-r--r--reader/feed/json/parser_test.go359
-rw-r--r--reader/feed/parser.go8
-rw-r--r--reader/feed/rdf/parser.go28
-rw-r--r--reader/feed/rdf/parser_test.go307
-rw-r--r--reader/feed/rdf/rdf.go71
-rw-r--r--reader/feed/rss/parser.go29
-rw-r--r--reader/feed/rss/parser_test.go550
-rw-r--r--reader/feed/rss/rss.go235
14 files changed, 4 insertions, 2536 deletions
diff --git a/reader/feed/atom/atom.go b/reader/feed/atom/atom.go
deleted file mode 100644
index 2035a4e..0000000
--- a/reader/feed/atom/atom.go
+++ /dev/null
@@ -1,193 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package atom
-
-import (
- "encoding/xml"
- "log"
- "strconv"
- "strings"
- "time"
-
- "github.com/miniflux/miniflux2/helper"
- "github.com/miniflux/miniflux2/model"
- "github.com/miniflux/miniflux2/reader/feed/date"
- "github.com/miniflux/miniflux2/reader/processor"
- "github.com/miniflux/miniflux2/reader/sanitizer"
-)
-
-type atomFeed struct {
- XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
- ID string `xml:"id"`
- Title string `xml:"title"`
- Author atomAuthor `xml:"author"`
- Links []atomLink `xml:"link"`
- Entries []atomEntry `xml:"entry"`
-}
-
-type atomEntry struct {
- ID string `xml:"id"`
- Title string `xml:"title"`
- Updated string `xml:"updated"`
- Links []atomLink `xml:"link"`
- Summary string `xml:"summary"`
- Content atomContent `xml:"content"`
- MediaGroup atomMediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
- Author atomAuthor `xml:"author"`
-}
-
-type atomAuthor struct {
- Name string `xml:"name"`
- Email string `xml:"email"`
-}
-
-type atomLink struct {
- URL string `xml:"href,attr"`
- Type string `xml:"type,attr"`
- Rel string `xml:"rel,attr"`
- Length string `xml:"length,attr"`
-}
-
-type atomContent struct {
- Type string `xml:"type,attr"`
- Data string `xml:",chardata"`
- XML string `xml:",innerxml"`
-}
-
-type atomMediaGroup struct {
- Description string `xml:"http://search.yahoo.com/mrss/ description"`
-}
-
-func (a *atomFeed) Transform() *model.Feed {
- feed := new(model.Feed)
- feed.FeedURL = getRelationURL(a.Links, "self")
- feed.SiteURL = getURL(a.Links)
- feed.Title = sanitizer.StripTags(a.Title)
-
- if feed.Title == "" {
- feed.Title = feed.SiteURL
- }
-
- for _, entry := range a.Entries {
- item := entry.Transform()
- if item.Author == "" {
- item.Author = getAuthor(a.Author)
- }
-
- feed.Entries = append(feed.Entries, item)
- }
-
- return feed
-}
-
-func (a *atomEntry) Transform() *model.Entry {
- entry := new(model.Entry)
- entry.URL = getURL(a.Links)
- entry.Date = getDate(a)
- entry.Author = sanitizer.StripTags(getAuthor(a.Author))
- entry.Hash = getHash(a)
- entry.Content = processor.ItemContentProcessor(entry.URL, getContent(a))
- entry.Title = sanitizer.StripTags(strings.Trim(a.Title, " \n\t"))
- entry.Enclosures = getEnclosures(a)
-
- if entry.Title == "" {
- entry.Title = entry.URL
- }
-
- return entry
-}
-
-func getURL(links []atomLink) string {
- for _, link := range links {
- if strings.ToLower(link.Rel) == "alternate" {
- return link.URL
- }
-
- if link.Rel == "" && link.Type == "" {
- return link.URL
- }
- }
-
- return ""
-}
-
-func getRelationURL(links []atomLink, relation string) string {
- for _, link := range links {
- if strings.ToLower(link.Rel) == relation {
- return link.URL
- }
- }
-
- return ""
-}
-
-func getDate(a *atomEntry) time.Time {
- if a.Updated != "" {
- result, err := date.Parse(a.Updated)
- if err != nil {
- log.Println(err)
- return time.Now()
- }
-
- return result
- }
-
- return time.Now()
-}
-
-func getContent(a *atomEntry) string {
- if a.Content.Type == "html" || a.Content.Type == "text" {
- return a.Content.Data
- }
-
- if a.Content.Type == "xhtml" {
- return a.Content.XML
- }
-
- if a.Summary != "" {
- return a.Summary
- }
-
- if a.MediaGroup.Description != "" {
- return a.MediaGroup.Description
- }
-
- return ""
-}
-
-func getHash(a *atomEntry) string {
- for _, value := range []string{a.ID, getURL(a.Links)} {
- if value != "" {
- return helper.Hash(value)
- }
- }
-
- return ""
-}
-
-func getEnclosures(a *atomEntry) model.EnclosureList {
- enclosures := make(model.EnclosureList, 0)
-
- for _, link := range a.Links {
- if strings.ToLower(link.Rel) == "enclosure" {
- length, _ := strconv.Atoi(link.Length)
- enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length})
- }
- }
-
- return enclosures
-}
-
-func getAuthor(author atomAuthor) string {
- if author.Name != "" {
- return author.Name
- }
-
- if author.Email != "" {
- return author.Email
- }
-
- return ""
-}
diff --git a/reader/feed/atom/parser.go b/reader/feed/atom/parser.go
deleted file mode 100644
index 04bfd04..0000000
--- a/reader/feed/atom/parser.go
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package atom
-
-import (
- "encoding/xml"
- "io"
-
- "github.com/miniflux/miniflux2/errors"
- "github.com/miniflux/miniflux2/model"
-
- "golang.org/x/net/html/charset"
-)
-
-// Parse returns a normalized feed struct from a Atom feed.
-func Parse(data io.Reader) (*model.Feed, error) {
- atomFeed := new(atomFeed)
- decoder := xml.NewDecoder(data)
- decoder.CharsetReader = charset.NewReaderLabel
-
- err := decoder.Decode(atomFeed)
- if err != nil {
- return nil, errors.NewLocalizedError("Unable to parse Atom feed: %v.", err)
- }
-
- return atomFeed.Transform(), nil
-}
diff --git a/reader/feed/atom/parser_test.go b/reader/feed/atom/parser_test.go
deleted file mode 100644
index d5eea23..0000000
--- a/reader/feed/atom/parser_test.go
+++ /dev/null
@@ -1,333 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package atom
-
-import (
- "bytes"
- "testing"
- "time"
-
- "github.com/miniflux/miniflux2/errors"
-)
-
-func TestParseAtomSample(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
-
- <title>Example Feed</title>
- <link href="http://example.org/"/>
- <updated>2003-12-13T18:30:02Z</updated>
- <author>
- <name>John Doe</name>
- </author>
- <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
-
- <entry>
- <title>Atom-Powered Robots Run Amok</title>
- <link href="http://example.org/2003/12/13/atom03"/>
- <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
- <updated>2003-12-13T18:30:02Z</updated>
- <summary>Some text.</summary>
- </entry>
-
- </feed>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Title != "Example Feed" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
-
- if feed.FeedURL != "" {
- t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
- }
-
- if feed.SiteURL != "http://example.org/" {
- t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
- }
-
- if len(feed.Entries) != 1 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) {
- t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
- }
-
- if feed.Entries[0].Hash != "3841e5cf232f5111fc5841e9eba5f4b26d95e7d7124902e0f7272729d65601a6" {
- t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
- }
-
- if feed.Entries[0].URL != "http://example.org/2003/12/13/atom03" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
-
- if feed.Entries[0].Title != "Atom-Powered Robots Run Amok" {
- t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
- }
-
- if feed.Entries[0].Content != "Some text." {
- t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
- }
-
- if feed.Entries[0].Author != "John Doe" {
- t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
- }
-}
-
-func TestParseFeedWithoutTitle(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
- <link rel="alternate" type="text/html" href="https://example.org/"/>
- <link rel="self" type="application/atom+xml" href="https://example.org/feed"/>
- <updated>2003-12-13T18:30:02Z</updated>
- </feed>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Title != "https://example.org/" {
- t.Errorf("Incorrect feed title, got: %s", feed.Title)
- }
-}
-
-func TestParseEntryWithoutTitle(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
-
- <title>Example Feed</title>
- <link href="http://example.org/"/>
- <updated>2003-12-13T18:30:02Z</updated>
- <author>
- <name>John Doe</name>
- </author>
- <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
-
- <entry>
- <link href="http://example.org/2003/12/13/atom03"/>
- <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
- <updated>2003-12-13T18:30:02Z</updated>
- <summary>Some text.</summary>
- </entry>
-
- </feed>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Title != "http://example.org/2003/12/13/atom03" {
- t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
- }
-}
-
-func TestParseFeedURL(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
- <title>Example Feed</title>
- <link rel="alternate" type="text/html" href="https://example.org/"/>
- <link rel="self" type="application/atom+xml" href="https://example.org/feed"/>
- <updated>2003-12-13T18:30:02Z</updated>
- </feed>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.SiteURL != "https://example.org/" {
- t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
- }
-
- if feed.FeedURL != "https://example.org/feed" {
- t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
- }
-}
-
-func TestParseEntryTitleWithWhitespaces(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
- <title>Example Feed</title>
- <link href="http://example.org/"/>
-
- <entry>
- <title>
- Some Title
- </title>
- <link href="http://example.org/2003/12/13/atom03"/>
- <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
- <updated>2003-12-13T18:30:02Z</updated>
- <summary>Some text.</summary>
- </entry>
-
- </feed>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Title != "Some Title" {
- t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
- }
-}
-
-func TestParseEntryWithAuthorName(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
- <title>Example Feed</title>
- <link href="http://example.org/"/>
-
- <entry>
- <link href="http://example.org/2003/12/13/atom03"/>
- <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
- <updated>2003-12-13T18:30:02Z</updated>
- <summary>Some text.</summary>
- <author>
- <name>Me</name>
- <email>me@localhost</email>
- </author>
- </entry>
-
- </feed>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Author != "Me" {
- t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
- }
-}
-
-func TestParseEntryWithoutAuthorName(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
- <title>Example Feed</title>
- <link href="http://example.org/"/>
-
- <entry>
- <link href="http://example.org/2003/12/13/atom03"/>
- <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
- <updated>2003-12-13T18:30:02Z</updated>
- <summary>Some text.</summary>
- <author>
- <name/>
- <email>me@localhost</email>
- </author>
- </entry>
-
- </feed>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Author != "me@localhost" {
- t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
- }
-}
-
-func TestParseEntryWithEnclosures(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
- <id>http://www.example.org/myfeed</id>
- <title>My Podcast Feed</title>
- <updated>2005-07-15T12:00:00Z</updated>
- <author>
- <name>John Doe</name>
- </author>
- <link href="http://example.org" />
- <link rel="self" href="http://example.org/myfeed" />
- <entry>
- <id>http://www.example.org/entries/1</id>
- <title>Atom 1.0</title>
- <updated>2005-07-15T12:00:00Z</updated>
- <link href="http://www.example.org/entries/1" />
- <summary>An overview of Atom 1.0</summary>
- <link rel="enclosure"
- type="audio/mpeg"
- title="MP3"
- href="http://www.example.org/myaudiofile.mp3"
- length="1234" />
- <link rel="enclosure"
- type="application/x-bittorrent"
- title="BitTorrent"
- href="http://www.example.org/myaudiofile.torrent"
- length="4567" />
- <content type="xhtml">
- <div xmlns="http://www.w3.org/1999/xhtml">
- <h1>Show Notes</h1>
- <ul>
- <li>00:01:00 -- Introduction</li>
- <li>00:15:00 -- Talking about Atom 1.0</li>
- <li>00:30:00 -- Wrapping up</li>
- </ul>
- </div>
- </content>
- </entry>
- </feed>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if len(feed.Entries) != 1 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if feed.Entries[0].URL != "http://www.example.org/entries/1" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
-
- if len(feed.Entries[0].Enclosures) != 2 {
- t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
- }
-
- if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
- t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
- }
-
- if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
- t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
- }
-
- if feed.Entries[0].Enclosures[0].Size != 1234 {
- t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
- }
-
- if feed.Entries[0].Enclosures[1].URL != "http://www.example.org/myaudiofile.torrent" {
- t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[1].URL)
- }
-
- if feed.Entries[0].Enclosures[1].MimeType != "application/x-bittorrent" {
- t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[1].MimeType)
- }
-
- if feed.Entries[0].Enclosures[1].Size != 4567 {
- t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[1].Size)
- }
-}
-
-func TestParseInvalidXml(t *testing.T) {
- data := `garbage`
- _, err := Parse(bytes.NewBufferString(data))
- if err == nil {
- t.Error("Parse should returns an error")
- }
-
- if _, ok := err.(errors.LocalizedError); !ok {
- t.Error("The error returned must be a LocalizedError")
- }
-}
diff --git a/reader/feed/date/parser.go b/reader/feed/date/parser.go
deleted file mode 100644
index e573ff8..0000000
--- a/reader/feed/date/parser.go
+++ /dev/null
@@ -1,203 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package date
-
-import (
- "fmt"
- "strings"
- "time"
-)
-
-// DateFormats taken from github.com/mjibson/goread
-var dateFormats = []string{
- time.RFC822, // RSS
- time.RFC822Z, // RSS
- time.RFC3339, // Atom
- time.UnixDate,
- time.RubyDate,
- time.RFC850,
- time.RFC1123Z,
- time.RFC1123,
- time.ANSIC,
- "Mon, January 2 2006 15:04:05 -0700",
- "Mon, January 02, 2006, 15:04:05 MST",
- "Mon, January 02, 2006 15:04:05 MST",
- "Mon, Jan 2, 2006 15:04 MST",
- "Mon, Jan 2 2006 15:04 MST",
- "Mon, Jan 2, 2006 15:04:05 MST",
- "Mon, Jan 2 2006 15:04:05 -700",
- "Mon, Jan 2 2006 15:04:05 -0700",
- "Mon Jan 2 15:04 2006",
- "Mon Jan 2 15:04:05 2006 MST",
- "Mon Jan 02, 2006 3:04 pm",
- "Mon, Jan 02,2006 15:04:05 MST",
- "Mon Jan 02 2006 15:04:05 -0700",
- "Monday, January 2, 2006 15:04:05 MST",
- "Monday, January 2, 2006 03:04 PM",
- "Monday, January 2, 2006",
- "Monday, January 02, 2006",
- "Monday, 2 January 2006 15:04:05 MST",
- "Monday, 2 January 2006 15:04:05 -0700",
- "Monday, 2 Jan 2006 15:04:05 MST",
- "Monday, 2 Jan 2006 15:04:05 -0700",
- "Monday, 02 January 2006 15:04:05 MST",
- "Monday, 02 January 2006 15:04:05 -0700",
- "Monday, 02 January 2006 15:04:05",
- "Mon, 2 January 2006 15:04 MST",
- "Mon, 2 January 2006, 15:04 -0700",
- "Mon, 2 January 2006, 15:04:05 MST",
- "Mon, 2 January 2006 15:04:05 MST",
- "Mon, 2 January 2006 15:04:05 -0700",
- "Mon, 2 January 2006",
- "Mon, 2 Jan 2006 3:04:05 PM -0700",
- "Mon, 2 Jan 2006 15:4:5 MST",
- "Mon, 2 Jan 2006 15:4:5 -0700 GMT",
- "Mon, 2, Jan 2006 15:4",
- "Mon, 2 Jan 2006 15:04 MST",
- "Mon, 2 Jan 2006, 15:04 -0700",
- "Mon, 2 Jan 2006 15:04 -0700",
- "Mon, 2 Jan 2006 15:04:05 UT",
- "Mon, 2 Jan 2006 15:04:05MST",
- "Mon, 2 Jan 2006 15:04:05 MST",
- "Mon 2 Jan 2006 15:04:05 MST",
- "mon,2 Jan 2006 15:04:05 MST",
- "Mon, 2 Jan 2006 15:04:05 -0700 MST",
- "Mon, 2 Jan 2006 15:04:05-0700",
- "Mon, 2 Jan 2006 15:04:05 -0700",
- "Mon, 2 Jan 2006 15:04:05",
- "Mon, 2 Jan 2006 15:04",
- "Mon,2 Jan 2006",
- "Mon, 2 Jan 2006",
- "Mon, 2 Jan 15:04:05 MST",
- "Mon, 2 Jan 06 15:04:05 MST",
- "Mon, 2 Jan 06 15:04:05 -0700",
- "Mon, 2006-01-02 15:04",
- "Mon,02 January 2006 14:04:05 MST",
- "Mon, 02 January 2006",
- "Mon, 02 Jan 2006 3:04:05 PM MST",
- "Mon, 02 Jan 2006 15 -0700",
- "Mon,02 Jan 2006 15:04 MST",
- "Mon, 02 Jan 2006 15:04 MST",
- "Mon, 02 Jan 2006 15:04 -0700",
- "Mon, 02 Jan 2006 15:04:05 Z",
- "Mon, 02 Jan 2006 15:04:05 UT",
- "Mon, 02 Jan 2006 15:04:05 MST-07:00",
- "Mon, 02 Jan 2006 15:04:05 MST -0700",
- "Mon, 02 Jan 2006, 15:04:05 MST",
- "Mon, 02 Jan 2006 15:04:05MST",
- "Mon, 02 Jan 2006 15:04:05 MST",
- "Mon , 02 Jan 2006 15:04:05 MST",
- "Mon, 02 Jan 2006 15:04:05 GMT-0700",
- "Mon,02 Jan 2006 15:04:05 -0700",
- "Mon, 02 Jan 2006 15:04:05 -0700",
- "Mon, 02 Jan 2006 15:04:05 -07:00",
- "Mon, 02 Jan 2006 15:04:05 --0700",
- "Mon 02 Jan 2006 15:04:05 -0700",
- "Mon, 02 Jan 2006 15:04:05 -07",
- "Mon, 02 Jan 2006 15:04:05 00",
- "Mon, 02 Jan 2006 15:04:05",
- "Mon, 02 Jan 2006",
- "Mon, 02 Jan 06 15:04:05 MST",
- "January 2, 2006 3:04 PM",
- "January 2, 2006, 3:04 p.m.",
- "January 2, 2006 15:04:05 MST",
- "January 2, 2006 15:04:05",
- "January 2, 2006 03:04 PM",
- "January 2, 2006",
- "January 02, 2006 15:04:05 MST",
- "January 02, 2006 15:04",
- "January 02, 2006 03:04 PM",
- "January 02, 2006",
- "Jan 2, 2006 3:04:05 PM MST",
- "Jan 2, 2006 3:04:05 PM",
- "Jan 2, 2006 15:04:05 MST",
- "Jan 2, 2006",
- "Jan 02 2006 03:04:05PM",
- "Jan 02, 2006",
- "6/1/2 15:04",
- "6-1-2 15:04",
- "2 January 2006 15:04:05 MST",
- "2 January 2006 15:04:05 -0700",
- "2 January 2006",
- "2 Jan 2006 15:04:05 Z",
- "2 Jan 2006 15:04:05 MST",
- "2 Jan 2006 15:04:05 -0700",
- "2 Jan 2006",
- "2.1.2006 15:04:05",
- "2/1/2006",
- "2-1-2006",
- "2006 January 02",
- "2006-1-2T15:04:05Z",
- "2006-1-2 15:04:05",
- "2006-1-2",
- "2006-1-02T15:04:05Z",
- "2006-01-02T15:04Z",
- "2006-01-02T15:04-07:00",
- "2006-01-02T15:04:05Z",
- "2006-01-02T15:04:05-07:00:00",
- "2006-01-02T15:04:05:-0700",
- "2006-01-02T15:04:05-0700",
- "2006-01-02T15:04:05-07:00",
- "2006-01-02T15:04:05 -0700",
- "2006-01-02T15:04:05:00",
- "2006-01-02T15:04:05",
- "2006-01-02 at 15:04:05",
- "2006-01-02 15:04:05Z",
- "2006-01-02 15:04:05 MST",
- "2006-01-02 15:04:05-0700",
- "2006-01-02 15:04:05-07:00",
- "2006-01-02 15:04:05 -0700",
- "2006-01-02 15:04",
- "2006-01-02 00:00:00.0 15:04:05.0 -0700",
- "2006/01/02",
- "2006-01-02",
- "15:04 02.01.2006 -0700",
- "1/2/2006 3:04 PM MST",
- "1/2/2006 3:04:05 PM MST",
- "1/2/2006 3:04:05 PM",
- "1/2/2006 15:04:05 MST",
- "1/2/2006",
- "06/1/2 15:04",
- "06-1-2 15:04",
- "02 Monday, Jan 2006 15:04",
- "02 Jan 2006 15:04 MST",
- "02 Jan 2006 15:04:05 UT",
- "02 Jan 2006 15:04:05 MST",
- "02 Jan 2006 15:04:05 -0700",
- "02 Jan 2006 15:04:05",
- "02 Jan 2006",
- "02/01/2006 15:04 MST",
- "02-01-2006 15:04:05 MST",
- "02.01.2006 15:04:05",
- "02/01/2006 15:04:05",
- "02.01.2006 15:04",
- "02/01/2006 - 15:04",
- "02.01.2006 -0700",
- "02/01/2006",
- "02-01-2006",
- "01/02/2006 3:04 PM",
- "01/02/2006 15:04:05 MST",
- "01/02/2006 - 15:04",
- "01/02/2006",
- "01-02-2006",
-}
-
-// Parse parses a given date string using a large
-// list of commonly found feed date formats.
-func Parse(ds string) (t time.Time, err error) {
- d := strings.TrimSpace(ds)
- if d == "" {
- return t, fmt.Errorf("Date string is empty")
- }
-
- for _, f := range dateFormats {
- if t, err = time.Parse(f, d); err == nil {
- return
- }
- }
-
- err = fmt.Errorf("Failed to parse date: %s", ds)
- return
-}
diff --git a/reader/feed/json/json.go b/reader/feed/json/json.go
deleted file mode 100644
index e4473f4..0000000
--- a/reader/feed/json/json.go
+++ /dev/null
@@ -1,171 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package json
-
-import (
- "log"
- "strings"
- "time"
-
- "github.com/miniflux/miniflux2/helper"
- "github.com/miniflux/miniflux2/model"
- "github.com/miniflux/miniflux2/reader/feed/date"
- "github.com/miniflux/miniflux2/reader/processor"
- "github.com/miniflux/miniflux2/reader/sanitizer"
-)
-
-type jsonFeed struct {
- Version string `json:"version"`
- Title string `json:"title"`
- SiteURL string `json:"home_page_url"`
- FeedURL string `json:"feed_url"`
- Author jsonAuthor `json:"author"`
- Items []jsonItem `json:"items"`
-}
-
-type jsonAuthor struct {
- Name string `json:"name"`
- URL string `json:"url"`
-}
-
-type jsonItem struct {
- ID string `json:"id"`
- URL string `json:"url"`
- Title string `json:"title"`
- Summary string `json:"summary"`
- Text string `json:"content_text"`
- HTML string `json:"content_html"`
- DatePublished string `json:"date_published"`
- DateModified string `json:"date_modified"`
- Author jsonAuthor `json:"author"`
- Attachments []jsonAttachment `json:"attachments"`
-}
-
-type jsonAttachment struct {
- URL string `json:"url"`
- MimeType string `json:"mime_type"`
- Title string `json:"title"`
- Size int `json:"size_in_bytes"`
- Duration int `json:"duration_in_seconds"`
-}
-
-func (j *jsonFeed) GetAuthor() string {
- return getAuthor(j.Author)
-}
-
-func (j *jsonFeed) Transform() *model.Feed {
- feed := new(model.Feed)
- feed.FeedURL = j.FeedURL
- feed.SiteURL = j.SiteURL
- feed.Title = sanitizer.StripTags(j.Title)
-
- if feed.Title == "" {
- feed.Title = feed.SiteURL
- }
-
- for _, item := range j.Items {
- entry := item.Transform()
- if entry.Author == "" {
- entry.Author = j.GetAuthor()
- }
-
- feed.Entries = append(feed.Entries, entry)
- }
-
- return feed
-}
-
-func (j *jsonItem) GetDate() time.Time {
- for _, value := range []string{j.DatePublished, j.DateModified} {
- if value != "" {
- d, err := date.Parse(value)
- if err != nil {
- log.Println(err)
- return time.Now()
- }
-
- return d
- }
- }
-
- return time.Now()
-}
-
-func (j *jsonItem) GetAuthor() string {
- return getAuthor(j.Author)
-}
-
-func (j *jsonItem) GetHash() string {
- for _, value := range []string{j.ID, j.URL, j.Text + j.HTML + j.Summary} {
- if value != "" {
- return helper.Hash(value)
- }
- }
-
- return ""
-}
-
-func (j *jsonItem) GetTitle() string {
- for _, value := range []string{j.Title, j.Summary, j.Text, j.HTML} {
- if value != "" {
- return truncate(value)
- }
- }
-
- return j.URL
-}
-
-func (j *jsonItem) GetContent() string {
- for _, value := range []string{j.HTML, j.Text, j.Summary} {
- if value != "" {
- return value
- }
- }
-
- return ""
-}
-
-func (j *jsonItem) GetEnclosures() model.EnclosureList {
- enclosures := make(model.EnclosureList, 0)
-
- for _, attachment := range j.Attachments {
- enclosures = append(enclosures, &model.Enclosure{
- URL: attachment.URL,
- MimeType: attachment.MimeType,
- Size: attachment.Size,
- })
- }
-
- return enclosures
-}
-
-func (j *jsonItem) Transform() *model.Entry {
- entry := new(model.Entry)
- entry.URL = j.URL
- entry.Date = j.GetDate()
- entry.Author = sanitizer.StripTags(j.GetAuthor())
- entry.Hash = j.GetHash()
- entry.Content = processor.ItemContentProcessor(entry.URL, j.GetContent())
- entry.Title = sanitizer.StripTags(strings.Trim(j.GetTitle(), " \n\t"))
- entry.Enclosures = j.GetEnclosures()
- return entry
-}
-
-func getAuthor(author jsonAuthor) string {
- if author.Name != "" {
- return author.Name
- }
-
- return ""
-}
-
-func truncate(str string) string {
- max := 100
- if len(str) > max {
- return str[:max] + "..."
- }
-
- return str
-}
diff --git a/reader/feed/json/parser.go b/reader/feed/json/parser.go
deleted file mode 100644
index 18329e7..0000000
--- a/reader/feed/json/parser.go
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package json
-
-import (
- "encoding/json"
- "io"
-
- "github.com/miniflux/miniflux2/errors"
- "github.com/miniflux/miniflux2/model"
-)
-
-// Parse returns a normalized feed struct from a JON feed.
-func Parse(data io.Reader) (*model.Feed, error) {
- feed := new(jsonFeed)
- decoder := json.NewDecoder(data)
- if err := decoder.Decode(&feed); err != nil {
- return nil, errors.NewLocalizedError("Unable to parse JSON Feed: %v", err)
- }
-
- return feed.Transform(), nil
-}
diff --git a/reader/feed/json/parser_test.go b/reader/feed/json/parser_test.go
deleted file mode 100644
index ecb11a1..0000000
--- a/reader/feed/json/parser_test.go
+++ /dev/null
@@ -1,359 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package json
-
-import (
- "bytes"
- "strings"
- "testing"
- "time"
-
- "github.com/miniflux/miniflux2/errors"
-)
-
-func TestParseJsonFeed(t *testing.T) {
- data := `{
- "version": "https://jsonfeed.org/version/1",
- "title": "My Example Feed",
- "home_page_url": "https://example.org/",
- "feed_url": "https://example.org/feed.json",
- "items": [
- {
- "id": "2",
- "content_text": "This is a second item.",
- "url": "https://example.org/second-item"
- },
- {
- "id": "1",
- "content_html": "<p>Hello, world!</p>",
- "url": "https://example.org/initial-post"
- }
- ]
- }`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Title != "My Example Feed" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
-
- if feed.FeedURL != "https://example.org/feed.json" {
- t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
- }
-
- if feed.SiteURL != "https://example.org/" {
- t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
- }
-
- if len(feed.Entries) != 2 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if feed.Entries[0].Hash != "d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35" {
- t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
- }
-
- if feed.Entries[0].URL != "https://example.org/second-item" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
-
- if feed.Entries[0].Title != "This is a second item." {
- t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
- }
-
- if feed.Entries[0].Content != "This is a second item." {
- t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
- }
-
- if feed.Entries[1].Hash != "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b" {
- t.Errorf("Incorrect entry hash, got: %s", feed.Entries[1].Hash)
- }
-
- if feed.Entries[1].URL != "https://example.org/initial-post" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL)
- }
-
- if feed.Entries[1].Title != "Hello, world!" {
- t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[1].Title)
- }
-
- if feed.Entries[1].Content != "<p>Hello, world!</p>" {
- t.Errorf("Incorrect entry content, got: %s", feed.Entries[1].Content)
- }
-}
-
-func TestParsePodcast(t *testing.T) {
- data := `{
- "version": "https://jsonfeed.org/version/1",
- "user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
- "title": "The Record",
- "home_page_url": "http://therecord.co/",
- "feed_url": "http://therecord.co/feed.json",
- "items": [
- {
- "id": "http://therecord.co/chris-parrish",
- "title": "Special #1 - Chris Parrish",
- "url": "http://therecord.co/chris-parrish",
- "content_text": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.",
- "content_html": "Chris has worked at <a href=\"http://adobe.com/\">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href=\"http://aged-and-distilled.com/napkin/\">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href=\"http://www.ci.bainbridge-isl.wa.us/\">Bainbridge Island</a>, a quick ferry ride from Seattle.",
- "summary": "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
- "date_published": "2014-05-09T14:04:00-07:00",
- "attachments": [
- {
- "url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
- "mime_type": "audio/x-m4a",
- "size_in_bytes": 89970236,
- "duration_in_seconds": 6629
- }
- ]
- }
- ]
- }`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Title != "The Record" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
-
- if feed.FeedURL != "http://therecord.co/feed.json" {
- t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
- }
-
- if feed.SiteURL != "http://therecord.co/" {
- t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
- }
-
- if len(feed.Entries) != 1 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if feed.Entries[0].Hash != "6b678e57962a1b001e4e873756563cdc08bbd06ca561e764e0baa9a382485797" {
- t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
- }
-
- if feed.Entries[0].URL != "http://therecord.co/chris-parrish" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
-
- if feed.Entries[0].Title != "Special #1 - Chris Parrish" {
- t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
- }
-
- if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` {
- t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content)
- }
-
- location, _ := time.LoadLocation("America/Vancouver")
- if !feed.Entries[0].Date.Equal(time.Date(2014, time.May, 9, 14, 4, 0, 0, location)) {
- t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
- }
-
- if len(feed.Entries[0].Enclosures) != 1 {
- t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
- }
-
- if feed.Entries[0].Enclosures[0].URL != "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a" {
- t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
- }
-
- if feed.Entries[0].Enclosures[0].MimeType != "audio/x-m4a" {
- t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
- }
-
- if feed.Entries[0].Enclosures[0].Size != 89970236 {
- t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
- }
-}
-
-func TestParseAuthor(t *testing.T) {
- data := `{
- "version": "https://jsonfeed.org/version/1",
- "user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
- "title": "Brent Simmons’s Microblog",
- "home_page_url": "https://example.org/",
- "feed_url": "https://example.org/feed.json",
- "author": {
- "name": "Brent Simmons",
- "url": "http://example.org/",
- "avatar": "https://example.org/avatar.png"
- },
- "items": [
- {
- "id": "2347259",
- "url": "https://example.org/2347259",
- "content_text": "Cats are neat. \n\nhttps://example.org/cats",
- "date_published": "2016-02-09T14:22:00-07:00"
- }
- ]
- }`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if len(feed.Entries) != 1 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if feed.Entries[0].Author != "Brent Simmons" {
- t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
- }
-}
-
-func TestParseFeedWithoutTitle(t *testing.T) {
- data := `{
- "version": "https://jsonfeed.org/version/1",
- "home_page_url": "https://example.org/",
- "feed_url": "https://example.org/feed.json",
- "items": [
- {
- "id": "2347259",
- "url": "https://example.org/2347259",
- "content_text": "Cats are neat. \n\nhttps://example.org/cats",
- "date_published": "2016-02-09T14:22:00-07:00"
- }
- ]
- }`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Title != "https://example.org/" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
-}
-
-func TestParseFeedItemWithInvalidDate(t *testing.T) {
- data := `{
- "version": "https://jsonfeed.org/version/1",
- "title": "My Example Feed",
- "home_page_url": "https://example.org/",
- "feed_url": "https://example.org/feed.json",
- "items": [
- {
- "id": "2347259",
- "url": "https://example.org/2347259",
- "content_text": "Cats are neat. \n\nhttps://example.org/cats",
- "date_published": "Tomorrow"
- }
- ]
- }`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if len(feed.Entries) != 1 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if !feed.Entries[0].Date.Before(time.Now()) {
- t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
- }
-}
-
-func TestParseFeedItemWithoutID(t *testing.T) {
- data := `{
- "version": "https://jsonfeed.org/version/1",
- "title": "My Example Feed",
- "home_page_url": "https://example.org/",
- "feed_url": "https://example.org/feed.json",
- "items": [
- {
- "content_text": "Some text."
- }
- ]
- }`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if len(feed.Entries) != 1 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if feed.Entries[0].Hash != "13b4c5aecd1b6d749afcee968fbf9c80f1ed1bbdbe1aaf25cb34ebd01144bbe9" {
- t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
- }
-}
-
-func TestParseFeedItemWithoutTitle(t *testing.T) {
- data := `{
- "version": "https://jsonfeed.org/version/1",
- "title": "My Example Feed",
- "home_page_url": "https://example.org/",
- "feed_url": "https://example.org/feed.json",
- "items": [
- {
- "url": "https://example.org/item"
- }
- ]
- }`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if len(feed.Entries) != 1 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if feed.Entries[0].Title != "https://example.org/item" {
- t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
- }
-}
-
-func TestParseTruncateItemTitle(t *testing.T) {
- data := `{
- "version": "https://jsonfeed.org/version/1",
- "title": "My Example Feed",
- "home_page_url": "https://example.org/",
- "feed_url": "https://example.org/feed.json",
- "items": [
- {
- "title": "` + strings.Repeat("a", 200) + `"
- }
- ]
- }`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if len(feed.Entries) != 1 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if len(feed.Entries[0].Title) != 103 {
- t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
- }
-}
-
-func TestParseInvalidJSON(t *testing.T) {
- data := `garbage`
- _, err := Parse(bytes.NewBufferString(data))
- if err == nil {
- t.Error("Parse should returns an error")
- }
-
- if _, ok := err.(errors.LocalizedError); !ok {
- t.Error("The error returned must be a LocalizedError")
- }
-}
diff --git a/reader/feed/parser.go b/reader/feed/parser.go
index d94d72b..a28cf2d 100644
--- a/reader/feed/parser.go
+++ b/reader/feed/parser.go
@@ -14,10 +14,10 @@ import (
"github.com/miniflux/miniflux2/helper"
"github.com/miniflux/miniflux2/model"
- "github.com/miniflux/miniflux2/reader/feed/atom"
- "github.com/miniflux/miniflux2/reader/feed/json"
- "github.com/miniflux/miniflux2/reader/feed/rdf"
- "github.com/miniflux/miniflux2/reader/feed/rss"
+ "github.com/miniflux/miniflux2/reader/atom"
+ "github.com/miniflux/miniflux2/reader/json"
+ "github.com/miniflux/miniflux2/reader/rdf"
+ "github.com/miniflux/miniflux2/reader/rss"
"golang.org/x/net/html/charset"
)
diff --git a/reader/feed/rdf/parser.go b/reader/feed/rdf/parser.go
deleted file mode 100644
index f854a97..0000000
--- a/reader/feed/rdf/parser.go
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package rdf
-
-import (
- "encoding/xml"
- "io"
-
- "github.com/miniflux/miniflux2/errors"
- "github.com/miniflux/miniflux2/model"
- "golang.org/x/net/html/charset"
-)
-
-// Parse returns a normalized feed struct from a RDF feed.
-func Parse(data io.Reader) (*model.Feed, error) {
- feed := new(rdfFeed)
- decoder := xml.NewDecoder(data)
- decoder.CharsetReader = charset.NewReaderLabel
-
- err := decoder.Decode(feed)
- if err != nil {
- return nil, errors.NewLocalizedError("Unable to parse RDF feed: %v.", err)
- }
-
- return feed.Transform(), nil
-}
diff --git a/reader/feed/rdf/parser_test.go b/reader/feed/rdf/parser_test.go
deleted file mode 100644
index dadca6f..0000000
--- a/reader/feed/rdf/parser_test.go
+++ /dev/null
@@ -1,307 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package rdf
-
-import (
- "bytes"
- "strings"
- "testing"
-
- "github.com/miniflux/miniflux2/errors"
-)
-
-func TestParseRDFSample(t *testing.T) {
- data := `
- <?xml version="1.0"?>
-
- <rdf:RDF
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns="http://purl.org/rss/1.0/"
- >
-
- <channel rdf:about="http://www.xml.com/xml/news.rss">
- <title>XML.com</title>
- <link>http://xml.com/pub</link>
- <description>
- XML.com features a rich mix of information and services
- for the XML community.
- </description>
-
- <image rdf:resource="http://xml.com/universal/images/xml_tiny.gif" />
-
- <items>
- <rdf:Seq>
- <rdf:li resource="http://xml.com/pub/2000/08/09/xslt/xslt.html" />
- <rdf:li resource="http://xml.com/pub/2000/08/09/rdfdb/index.html" />
- </rdf:Seq>
- </items>
-
- <textinput rdf:resource="http://search.xml.com" />
-
- </channel>
-
- <image rdf:about="http://xml.com/universal/images/xml_tiny.gif">
- <title>XML.com</title>
- <link>http://www.xml.com</link>
- <url>http://xml.com/universal/images/xml_tiny.gif</url>
- </image>
-
- <item rdf:about="http://xml.com/pub/2000/08/09/xslt/xslt.html">
- <title>Processing Inclusions with XSLT</title>
- <link>http://xml.com/pub/2000/08/09/xslt/xslt.html</link>
- <description>
- Processing document inclusions with general XML tools can be
- problematic. This article proposes a way of preserving inclusion
- information through SAX-based processing.
- </description>
- </item>
-
- <item rdf:about="http://xml.com/pub/2000/08/09/rdfdb/index.html">
- <title>Putting RDF to Work</title>
- <link>http://xml.com/pub/2000/08/09/rdfdb/index.html</link>
- <description>
- Tool and API support for the Resource Description Framework
- is slowly coming of age. Edd Dumbill takes a look at RDFDB,
- one of the most exciting new RDF toolkits.
- </description>
- </item>
-
- <textinput rdf:about="http://search.xml.com">
- <title>Search XML.com</title>
- <description>Search XML.com's XML collection</description>
- <name>s</name>
- <link>http://search.xml.com</link>
- </textinput>
-
- </rdf:RDF>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Title != "XML.com" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
-
- if feed.FeedURL != "" {
- t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
- }
-
- if feed.SiteURL != "http://xml.com/pub" {
- t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
- }
-
- if len(feed.Entries) != 2 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if feed.Entries[1].Hash != "8aaeee5d3ab50351422fbded41078ee88c73bf1441085b16a8c09fd90a7db321" {
- t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
- }
-
- if feed.Entries[1].URL != "http://xml.com/pub/2000/08/09/rdfdb/index.html" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
-
- if feed.Entries[1].Title != "Putting RDF to Work" {
- t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
- }
-
- if strings.HasSuffix(feed.Entries[1].Content, "Tool and API support") {
- t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
- }
-}
-
-func TestParseRDFSampleWithDublinCore(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
-
- <rdf:RDF
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:dc="http://purl.org/dc/elements/1.1/"
- xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
- xmlns:co="http://purl.org/rss/1.0/modules/company/"
- xmlns:ti="http://purl.org/rss/1.0/modules/textinput/"
- xmlns="http://purl.org/rss/1.0/"
- >
-
- <channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
- <title>Meerkat</title>
- <link>http://meerkat.oreillynet.com</link>
- <description>Meerkat: An Open Wire Service</description>
- <dc:publisher>The O'Reilly Network</dc:publisher>
- <dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
- <dc:rights>Copyright &#169; 2000 O'Reilly &amp; Associates, Inc.</dc:rights>
- <dc:date>2000-01-01T12:00+00:00</dc:date>
- <sy:updatePeriod>hourly</sy:updatePeriod>
- <sy:updateFrequency>2</sy:updateFrequency>
- <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
-
- <image rdf:resource="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg" />
-
- <items>
- <rdf:Seq>
- <rdf:li resource="http://c.moreover.com/click/here.pl?r123" />
- </rdf:Seq>
- </items>
-
- <textinput rdf:resource="http://meerkat.oreillynet.com" />
-
- </channel>
-
- <image rdf:about="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg">
- <title>Meerkat Powered!</title>
- <url>http://meerkat.oreillynet.com/icons/meerkat-powered.jpg</url>
- <link>http://meerkat.oreillynet.com</link>
- </image>
-
- <item rdf:about="http://c.moreover.com/click/here.pl?r123">
- <title>XML: A Disruptive Technology</title>
- <link>http://c.moreover.com/click/here.pl?r123</link>
- <dc:description>
- XML is placing increasingly heavy loads on the existing technical
- infrastructure of the Internet.
- </dc:description>
- <dc:publisher>The O'Reilly Network</dc:publisher>
- <dc:creator>Simon St.Laurent (mailto:simonstl@simonstl.com)</dc:creator>
- <dc:rights>Copyright &#169; 2000 O'Reilly &amp; Associates, Inc.</dc:rights>
- <dc:subject>XML</dc:subject>
- <co:name>XML.com</co:name>
- <co:market>NASDAQ</co:market>
- <co:symbol>XML</co:symbol>
- </item>
-
- <textinput rdf:about="http://meerkat.oreillynet.com">
- <title>Search Meerkat</title>
- <description>Search Meerkat's RSS Database...</description>
- <name>s</name>
- <link>http://meerkat.oreillynet.com/</link>
- <ti:function>search</ti:function>
- <ti:inputType>regex</ti:inputType>
- </textinput>
-
- </rdf:RDF>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Title != "Meerkat" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
-
- if feed.FeedURL != "" {
- t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
- }
-
- if feed.SiteURL != "http://meerkat.oreillynet.com" {
- t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
- }
-
- if len(feed.Entries) != 1 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if feed.Entries[0].Hash != "fa4ef7c300b175ca66f92f226b5dba5caa2a9619f031101bf56e5b884b02cd97" {
- t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
- }
-
- if feed.Entries[0].URL != "http://c.moreover.com/click/here.pl?r123" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
-
- if feed.Entries[0].Title != "XML: A Disruptive Technology" {
- t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
- }
-
- if strings.HasSuffix(feed.Entries[0].Content, "XML is placing increasingly") {
- t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
- }
-
- if feed.Entries[0].Author != "Simon St.Laurent (mailto:simonstl@simonstl.com)" {
- t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
- }
-}
-
-func TestParseItemWithOnlyFeedAuthor(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
-
- <rdf:RDF
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:dc="http://purl.org/dc/elements/1.1/"
- xmlns="http://purl.org/rss/1.0/"
- >
-
- <channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
- <title>Meerkat</title>
- <link>http://meerkat.oreillynet.com</link>
- <dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
- </channel>
-
- <item rdf:about="http://c.moreover.com/click/here.pl?r123">
- <title>XML: A Disruptive Technology</title>
- <link>http://c.moreover.com/click/here.pl?r123</link>
- <dc:description>
- XML is placing increasingly heavy loads on the existing technical
- infrastructure of the Internet.
- </dc:description>
- </item>
- </rdf:RDF>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Author != "Rael Dornfest (mailto:rael@oreilly.com)" {
- t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
- }
-}
-
-func TestParseItemWithoutLink(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
-
- <rdf:RDF
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns="http://purl.org/rss/1.0/"
- >
-
- <channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
- <title>Meerkat</title>
- <link>http://meerkat.oreillynet.com</link>
- </channel>
-
- <item rdf:about="http://c.moreover.com/click/here.pl?r123">
- <title>Title</title>
- <description>Test</description>
- </item>
- </rdf:RDF>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Hash != "37f5223ebd58639aa62a49afbb61df960efb7dc5db5181dfb3cedd9a49ad34c6" {
- t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
- }
-
- if feed.Entries[0].URL != "http://meerkat.oreillynet.com" {
- t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
- }
-}
-
-func TestParseInvalidXml(t *testing.T) {
- data := `garbage`
- _, err := Parse(bytes.NewBufferString(data))
- if err == nil {
- t.Error("Parse should returns an error")
- }
-
- if _, ok := err.(errors.LocalizedError); !ok {
- t.Error("The error returned must be a LocalizedError")
- }
-}
diff --git a/reader/feed/rdf/rdf.go b/reader/feed/rdf/rdf.go
deleted file mode 100644
index 05281ca..0000000
--- a/reader/feed/rdf/rdf.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package rdf
-
-import (
- "encoding/xml"
-
- "github.com/miniflux/miniflux2/helper"
- "github.com/miniflux/miniflux2/reader/processor"
- "github.com/miniflux/miniflux2/reader/sanitizer"
-
- "github.com/miniflux/miniflux2/model"
-)
-
-type rdfFeed struct {
- XMLName xml.Name `xml:"RDF"`
- Title string `xml:"channel>title"`
- Link string `xml:"channel>link"`
- Creator string `xml:"channel>creator"`
- Items []rdfItem `xml:"item"`
-}
-
-func (r *rdfFeed) Transform() *model.Feed {
- feed := new(model.Feed)
- feed.Title = sanitizer.StripTags(r.Title)
- feed.SiteURL = r.Link
-
- for _, item := range r.Items {
- entry := item.Transform()
-
- if entry.Author == "" && r.Creator != "" {
- entry.Author = sanitizer.StripTags(r.Creator)
- }
-
- if entry.URL == "" {
- entry.URL = feed.SiteURL
- }
-
- feed.Entries = append(feed.Entries, entry)
- }
-
- return feed
-}
-
-type rdfItem struct {
- Title string `xml:"title"`
- Link string `xml:"link"`
- Description string `xml:"description"`
- Creator string `xml:"creator"`
-}
-
-func (r *rdfItem) Transform() *model.Entry {
- entry := new(model.Entry)
- entry.Title = sanitizer.StripTags(r.Title)
- entry.Author = sanitizer.StripTags(r.Creator)
- entry.URL = r.Link
- entry.Content = processor.ItemContentProcessor(entry.URL, r.Description)
- entry.Hash = getHash(r)
- return entry
-}
-
-func getHash(r *rdfItem) string {
- value := r.Link
- if value == "" {
- value = r.Title + r.Description
- }
-
- return helper.Hash(value)
-}
diff --git a/reader/feed/rss/parser.go b/reader/feed/rss/parser.go
deleted file mode 100644
index a5b4434..0000000
--- a/reader/feed/rss/parser.go
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package rss
-
-import (
- "encoding/xml"
- "io"
-
- "github.com/miniflux/miniflux2/errors"
- "github.com/miniflux/miniflux2/model"
-
- "golang.org/x/net/html/charset"
-)
-
-// Parse returns a normalized feed struct from a RSS feed.
-func Parse(data io.Reader) (*model.Feed, error) {
- feed := new(rssFeed)
- decoder := xml.NewDecoder(data)
- decoder.CharsetReader = charset.NewReaderLabel
-
- err := decoder.Decode(feed)
- if err != nil {
- return nil, errors.NewLocalizedError("Unable to parse RSS feed: %v.", err)
- }
-
- return feed.Transform(), nil
-}
diff --git a/reader/feed/rss/parser_test.go b/reader/feed/rss/parser_test.go
deleted file mode 100644
index 9f1a557..0000000
--- a/reader/feed/rss/parser_test.go
+++ /dev/null
@@ -1,550 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package rss
-
-import (
- "bytes"
- "testing"
- "time"
-
- "github.com/miniflux/miniflux2/errors"
-)
-
-func TestParseRss2Sample(t *testing.T) {
- data := `
- <?xml version="1.0"?>
- <rss version="2.0">
- <channel>
- <title>Liftoff News</title>
- <link>http://liftoff.msfc.nasa.gov/</link>
- <description>Liftoff to Space Exploration.</description>
- <language>en-us</language>
- <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
- <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
- <docs>http://blogs.law.harvard.edu/tech/rss</docs>
- <generator>Weblog Editor 2.0</generator>
- <managingEditor>editor@example.com</managingEditor>
- <webMaster>webmaster@example.com</webMaster>
- <item>
- <title>Star City</title>
- <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
- <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
- <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
- <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
- </item>
- <item>
- <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
- <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
- <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
- </item>
- <item>
- <title>The Engine That Does More</title>
- <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
- <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
- <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
- <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
- </item>
- <item>
- <title>Astronauts' Dirty Laundry</title>
- <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
- <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
- <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
- <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Title != "Liftoff News" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
- }
-
- if feed.FeedURL != "" {
- t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
- }
-
- if feed.SiteURL != "http://liftoff.msfc.nasa.gov/" {
- t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
- }
-
- if len(feed.Entries) != 4 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- expectedDate := time.Date(2003, time.June, 3, 9, 39, 21, 0, time.UTC)
- if !feed.Entries[0].Date.Equal(expectedDate) {
- t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
- }
-
- if feed.Entries[0].Hash != "5b2b4ac2fe1786ddf0fd2da2f1b07f64e691264f41f2db3ea360f31bb6d9152b" {
- t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
- }
-
- if feed.Entries[0].URL != "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
-
- if feed.Entries[0].Title != "Star City" {
- t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
- }
-
- if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` {
- t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
- }
-}
-
-func TestParseFeedWithoutTitle(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0">
- <channel>
- <link>https://example.org/</link>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Title != "https://example.org/" {
- t.Errorf("Incorrect feed title, got: %s", feed.Title)
- }
-}
-
-func TestParseEntryWithoutTitle(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0">
- <channel>
- <link>https://example.org/</link>
- <item>
- <link>https://example.org/item</link>
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Title != "https://example.org/item" {
- t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
- }
-}
-
-func TestParseEntryWithoutLink(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0">
- <channel>
- <link>https://example.org/</link>
- <item>
- <guid isPermaLink="false">1234</guid>
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].URL != "https://example.org/" {
- t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
- }
-
- if feed.Entries[0].Hash != "03ac674216f3e15c761ee1a5e255f067953623c8b388b4459e13f978d7c846f4" {
- t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
- }
-}
-
-func TestParseEntryWithAtomLink(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
- <channel>
- <link>https://example.org/</link>
- <item>
- <title>Test</title>
- <atom:link href="https://example.org/item" />
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].URL != "https://example.org/item" {
- t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
- }
-}
-
-func TestParseEntryWithMultipleAtomLinks(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
- <channel>
- <link>https://example.org/</link>
- <item>
- <title>Test</title>
- <atom:link rel="payment" href="https://example.org/a" />
- <atom:link rel="http://foobar.tld" href="https://example.org/b" />
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].URL != "https://example.org/b" {
- t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
- }
-}
-
-func TestParseFeedURLWithAtomLink(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
- <channel>
- <title>Example</title>
- <link>https://example.org/</link>
- <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.FeedURL != "https://example.org/rss" {
- t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
- }
-
- if feed.SiteURL != "https://example.org/" {
- t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
- }
-}
-
-func TestParseEntryWithAtomAuthor(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
- <channel>
- <title>Example</title>
- <link>https://example.org/</link>
- <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
- <item>
- <title>Test</title>
- <link>https://example.org/item</link>
- <author xmlns:author="http://www.w3.org/2005/Atom">
- <name>Foo Bar</name>
- <title>Vice President</title>
- <department/>
- <company>FooBar Inc.</company>
- </author>
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Author != "Foo Bar" {
- t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
- }
-}
-
-func TestParseEntryWithDublinCoreAuthor(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
- <channel>
- <title>Example</title>
- <link>https://example.org/</link>
- <item>
- <title>Test</title>
- <link>https://example.org/item</link>
- <dc:creator>Me (me@example.com)</dc:creator>
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Author != "Me (me@example.com)" {
- t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
- }
-}
-
-func TestParseEntryWithItunesAuthor(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
- <channel>
- <title>Example</title>
- <link>https://example.org/</link>
- <item>
- <title>Test</title>
- <link>https://example.org/item</link>
- <itunes:author>Someone</itunes:author>
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Author != "Someone" {
- t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
- }
-}
-
-func TestParseFeedWithItunesAuthor(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
- <channel>
- <title>Example</title>
- <link>https://example.org/</link>
- <itunes:author>Someone</itunes:author>
- <item>
- <title>Test</title>
- <link>https://example.org/item</link>
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Author != "Someone" {
- t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
- }
-}
-
-func TestParseEntryWithDublinCoreDate(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
- <channel>
- <title>Example</title>
- <link>http://example.org/</link>
- <item>
- <title>Item 1</title>
- <link>http://example.org/item1</link>
- <description>Description.</description>
- <guid isPermaLink="false">UUID</guid>
- <dc:date>2002-09-29T23:40:06-05:00</dc:date>
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- location, _ := time.LoadLocation("EST")
- expectedDate := time.Date(2002, time.September, 29, 23, 40, 06, 0, location)
- if !feed.Entries[0].Date.Equal(expectedDate) {
- t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
- }
-}
-
-func TestParseEntryWithContentEncoded(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
- <channel>
- <title>Example</title>
- <link>http://example.org/</link>
- <item>
- <title>Item 1</title>
- <link>http://example.org/item1</link>
- <description>Description.</description>
- <guid isPermaLink="false">UUID</guid>
- <content:encoded><![CDATA[<p><a href="http://www.example.org/">Example</a>.</p>]]></content:encoded>
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Content != `<p><a href="http://www.example.org/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Example</a>.</p>` {
- t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
- }
-}
-
-func TestParseEntryWithFeedBurnerLink(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
- <channel>
- <title>Example</title>
- <link>http://example.org/</link>
- <item>
- <title>Item 1</title>
- <link>http://example.org/item1</link>
- <feedburner:origLink>http://example.org/original</feedburner:origLink>
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].URL != "http://example.org/original" {
- t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].URL)
- }
-}
-
-func TestParseEntryTitleWithWhitespaces(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0">
- <channel>
- <title>Example</title>
- <link>http://example.org</link>
- <item>
- <title>
- Some Title
- </title>
- <link>http://www.example.org/entries/1</link>
- <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if feed.Entries[0].Title != "Some Title" {
- t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
- }
-}
-
-func TestParseEntryWithEnclosures(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0">
- <channel>
- <title>My Podcast Feed</title>
- <link>http://example.org</link>
- <author>some.email@example.org</author>
- <item>
- <title>Podcasting with RSS</title>
- <link>http://www.example.org/entries/1</link>
- <description>An overview of RSS podcasting</description>
- <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
- <guid isPermaLink="true">http://www.example.org/entries/1</guid>
- <enclosure url="http://www.example.org/myaudiofile.mp3"
- length="12345"
- type="audio/mpeg" />
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if len(feed.Entries) != 1 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if feed.Entries[0].URL != "http://www.example.org/entries/1" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
-
- if len(feed.Entries[0].Enclosures) != 1 {
- t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
- }
-
- if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
- t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
- }
-
- if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
- t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
- }
-
- if feed.Entries[0].Enclosures[0].Size != 12345 {
- t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
- }
-}
-
-func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
- data := `<?xml version="1.0" encoding="utf-8"?>
- <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
- <channel>
- <title>My Example Feed</title>
- <link>http://example.org</link>
- <author>some.email@example.org</author>
- <item>
- <title>Example Item</title>
- <link>http://www.example.org/entries/1</link>
- <enclosure
- url="http://feedproxy.google.com/~r/example/~5/lpMyFSCvubs/File.mp3"
- length="76192460"
- type="audio/mpeg" />
- <feedburner:origEnclosureLink>http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3</feedburner:origEnclosureLink>
- </item>
- </channel>
- </rss>`
-
- feed, err := Parse(bytes.NewBufferString(data))
- if err != nil {
- t.Error(err)
- }
-
- if len(feed.Entries) != 1 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if feed.Entries[0].URL != "http://www.example.org/entries/1" {
- t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
- }
-
- if len(feed.Entries[0].Enclosures) != 1 {
- t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
- }
-
- if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
- t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
- }
-
- if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
- t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
- }
-
- if feed.Entries[0].Enclosures[0].Size != 76192460 {
- t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
- }
-}
-
-func TestParseInvalidXml(t *testing.T) {
- data := `garbage`
- _, err := Parse(bytes.NewBufferString(data))
- if err == nil {
- t.Error("Parse should returns an error")
- }
-
- if _, ok := err.(errors.LocalizedError); !ok {
- t.Error("The error returned must be a LocalizedError")
- }
-}
diff --git a/reader/feed/rss/rss.go b/reader/feed/rss/rss.go
deleted file mode 100644
index d685a3e..0000000
--- a/reader/feed/rss/rss.go
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package rss
-
-import (
- "encoding/xml"
- "log"
- "path"
- "strconv"
- "strings"
- "time"
-
- "github.com/miniflux/miniflux2/helper"
- "github.com/miniflux/miniflux2/model"
- "github.com/miniflux/miniflux2/reader/feed/date"
- "github.com/miniflux/miniflux2/reader/processor"
- "github.com/miniflux/miniflux2/reader/sanitizer"
-)
-
-type rssFeed struct {
- XMLName xml.Name `xml:"rss"`
- Version string `xml:"version,attr"`
- Title string `xml:"channel>title"`
- Links []rssLink `xml:"channel>link"`
- Language string `xml:"channel>language"`
- Description string `xml:"channel>description"`
- PubDate string `xml:"channel>pubDate"`
- ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"`
- Items []rssItem `xml:"channel>item"`
-}
-
-type rssLink struct {
- XMLName xml.Name
- Data string `xml:",chardata"`
- Href string `xml:"href,attr"`
- Rel string `xml:"rel,attr"`
-}
-
-type rssItem struct {
- GUID string `xml:"guid"`
- Title string `xml:"title"`
- Links []rssLink `xml:"link"`
- OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
- Description string `xml:"description"`
- Content string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
- PubDate string `xml:"pubDate"`
- Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
- Authors []rssAuthor `xml:"author"`
- Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
- Enclosures []rssEnclosure `xml:"enclosure"`
- OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
-}
-
-type rssAuthor struct {
- XMLName xml.Name
- Data string `xml:",chardata"`
- Name string `xml:"name"`
-}
-
-type rssEnclosure struct {
- URL string `xml:"url,attr"`
- Type string `xml:"type,attr"`
- Length string `xml:"length,attr"`
-}
-
-func (r *rssFeed) GetSiteURL() string {
- for _, element := range r.Links {
- if element.XMLName.Space == "" {
- return element.Data
- }
- }
-
- return ""
-}
-
-func (r *rssFeed) GetFeedURL() string {
- for _, element := range r.Links {
- if element.XMLName.Space == "http://www.w3.org/2005/Atom" {
- return element.Href
- }
- }
-
- return ""
-}
-
-func (r *rssFeed) Transform() *model.Feed {
- feed := new(model.Feed)
- feed.SiteURL = r.GetSiteURL()
- feed.FeedURL = r.GetFeedURL()
- feed.Title = sanitizer.StripTags(r.Title)
-
- if feed.Title == "" {
- feed.Title = feed.SiteURL
- }
-
- for _, item := range r.Items {
- entry := item.Transform()
-
- if entry.Author == "" && r.ItunesAuthor != "" {
- entry.Author = r.ItunesAuthor
- }
- entry.Author = sanitizer.StripTags(entry.Author)
-
- if entry.URL == "" {
- entry.URL = feed.SiteURL
- }
-
- feed.Entries = append(feed.Entries, entry)
- }
-
- return feed
-}
-func (r *rssItem) GetDate() time.Time {
- value := r.PubDate
- if r.Date != "" {
- value = r.Date
- }
-
- if value != "" {
- result, err := date.Parse(value)
- if err != nil {
- log.Println(err)
- return time.Now()
- }
-
- return result
- }
-
- return time.Now()
-}
-
-func (r *rssItem) GetAuthor() string {
- for _, element := range r.Authors {
- if element.Name != "" {
- return element.Name
- }
-
- if element.Data != "" {
- return element.Data
- }
- }
-
- return r.Creator
-}
-
-func (r *rssItem) GetHash() string {
- for _, value := range []string{r.GUID, r.GetURL()} {
- if value != "" {
- return helper.Hash(value)
- }
- }
-
- return ""
-}
-
-func (r *rssItem) GetContent() string {
- if r.Content != "" {
- return r.Content
- }
-
- return r.Description
-}
-
-func (r *rssItem) GetURL() string {
- if r.OriginalLink != "" {
- return r.OriginalLink
- }
-
- for _, link := range r.Links {
- if link.XMLName.Space == "http://www.w3.org/2005/Atom" && link.Href != "" && isValidLinkRelation(link.Rel) {
- return link.Href
- }
-
- if link.Data != "" {
- return link.Data
- }
- }
-
- return ""
-}
-
-func (r *rssItem) GetEnclosures() model.EnclosureList {
- enclosures := make(model.EnclosureList, 0)
-
- for _, enclosure := range r.Enclosures {
- length, _ := strconv.Atoi(enclosure.Length)
- enclosureURL := enclosure.URL
-
- if r.OrigEnclosureLink != "" {
- filename := path.Base(r.OrigEnclosureLink)
- if strings.Contains(enclosureURL, filename) {
- enclosureURL = r.OrigEnclosureLink
- }
- }
-
- enclosures = append(enclosures, &model.Enclosure{
- URL: enclosureURL,
- MimeType: enclosure.Type,
- Size: length,
- })
- }
-
- return enclosures
-}
-
-func (r *rssItem) Transform() *model.Entry {
- entry := new(model.Entry)
- entry.URL = r.GetURL()
- entry.Date = r.GetDate()
- entry.Author = r.GetAuthor()
- entry.Hash = r.GetHash()
- entry.Content = processor.ItemContentProcessor(entry.URL, r.GetContent())
- entry.Title = sanitizer.StripTags(strings.Trim(r.Title, " \n\t"))
- entry.Enclosures = r.GetEnclosures()
-
- if entry.Title == "" {
- entry.Title = entry.URL
- }
-
- return entry
-}
-
-func isValidLinkRelation(rel string) bool {
- switch rel {
- case "", "alternate", "enclosure", "related", "self", "via":
- return true
- default:
- if strings.HasPrefix(rel, "http") {
- return true
- }
- return false
- }
-}