aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/json
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2017-11-20 19:17:04 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2017-11-20 19:17:04 -0800
commitd5838b67340ca83dcc32f6d1775c183188ec5e7a (patch)
treea71705463458e5d2a6f74b0323119c3376f89b97 /reader/json
parentc26787f47667f69a2d3e85db01fe20b9bc70bc9a (diff)
Move feed parsers packages in reader package
Diffstat (limited to 'reader/json')
-rw-r--r--reader/json/json.go171
-rw-r--r--reader/json/parser.go24
-rw-r--r--reader/json/parser_test.go359
3 files changed, 554 insertions, 0 deletions
diff --git a/reader/json/json.go b/reader/json/json.go
new file mode 100644
index 0000000..cd6a1c8
--- /dev/null
+++ b/reader/json/json.go
@@ -0,0 +1,171 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "log"
+ "strings"
+ "time"
+
+ "github.com/miniflux/miniflux2/helper"
+ "github.com/miniflux/miniflux2/model"
+ "github.com/miniflux/miniflux2/reader/date"
+ "github.com/miniflux/miniflux2/reader/processor"
+ "github.com/miniflux/miniflux2/reader/sanitizer"
+)
+
+type jsonFeed struct {
+ Version string `json:"version"`
+ Title string `json:"title"`
+ SiteURL string `json:"home_page_url"`
+ FeedURL string `json:"feed_url"`
+ Author jsonAuthor `json:"author"`
+ Items []jsonItem `json:"items"`
+}
+
+type jsonAuthor struct {
+ Name string `json:"name"`
+ URL string `json:"url"`
+}
+
+type jsonItem struct {
+ ID string `json:"id"`
+ URL string `json:"url"`
+ Title string `json:"title"`
+ Summary string `json:"summary"`
+ Text string `json:"content_text"`
+ HTML string `json:"content_html"`
+ DatePublished string `json:"date_published"`
+ DateModified string `json:"date_modified"`
+ Author jsonAuthor `json:"author"`
+ Attachments []jsonAttachment `json:"attachments"`
+}
+
+type jsonAttachment struct {
+ URL string `json:"url"`
+ MimeType string `json:"mime_type"`
+ Title string `json:"title"`
+ Size int `json:"size_in_bytes"`
+ Duration int `json:"duration_in_seconds"`
+}
+
+func (j *jsonFeed) GetAuthor() string {
+ return getAuthor(j.Author)
+}
+
+func (j *jsonFeed) Transform() *model.Feed {
+ feed := new(model.Feed)
+ feed.FeedURL = j.FeedURL
+ feed.SiteURL = j.SiteURL
+ feed.Title = sanitizer.StripTags(j.Title)
+
+ if feed.Title == "" {
+ feed.Title = feed.SiteURL
+ }
+
+ for _, item := range j.Items {
+ entry := item.Transform()
+ if entry.Author == "" {
+ entry.Author = j.GetAuthor()
+ }
+
+ feed.Entries = append(feed.Entries, entry)
+ }
+
+ return feed
+}
+
+func (j *jsonItem) GetDate() time.Time {
+ for _, value := range []string{j.DatePublished, j.DateModified} {
+ if value != "" {
+ d, err := date.Parse(value)
+ if err != nil {
+ log.Println(err)
+ return time.Now()
+ }
+
+ return d
+ }
+ }
+
+ return time.Now()
+}
+
+func (j *jsonItem) GetAuthor() string {
+ return getAuthor(j.Author)
+}
+
+func (j *jsonItem) GetHash() string {
+ for _, value := range []string{j.ID, j.URL, j.Text + j.HTML + j.Summary} {
+ if value != "" {
+ return helper.Hash(value)
+ }
+ }
+
+ return ""
+}
+
+func (j *jsonItem) GetTitle() string {
+ for _, value := range []string{j.Title, j.Summary, j.Text, j.HTML} {
+ if value != "" {
+ return truncate(value)
+ }
+ }
+
+ return j.URL
+}
+
+func (j *jsonItem) GetContent() string {
+ for _, value := range []string{j.HTML, j.Text, j.Summary} {
+ if value != "" {
+ return value
+ }
+ }
+
+ return ""
+}
+
+func (j *jsonItem) GetEnclosures() model.EnclosureList {
+ enclosures := make(model.EnclosureList, 0)
+
+ for _, attachment := range j.Attachments {
+ enclosures = append(enclosures, &model.Enclosure{
+ URL: attachment.URL,
+ MimeType: attachment.MimeType,
+ Size: attachment.Size,
+ })
+ }
+
+ return enclosures
+}
+
+func (j *jsonItem) Transform() *model.Entry {
+ entry := new(model.Entry)
+ entry.URL = j.URL
+ entry.Date = j.GetDate()
+ entry.Author = sanitizer.StripTags(j.GetAuthor())
+ entry.Hash = j.GetHash()
+ entry.Content = processor.ItemContentProcessor(entry.URL, j.GetContent())
+ entry.Title = sanitizer.StripTags(strings.Trim(j.GetTitle(), " \n\t"))
+ entry.Enclosures = j.GetEnclosures()
+ return entry
+}
+
+func getAuthor(author jsonAuthor) string {
+ if author.Name != "" {
+ return author.Name
+ }
+
+ return ""
+}
+
+func truncate(str string) string {
+ max := 100
+ if len(str) > max {
+ return str[:max] + "..."
+ }
+
+ return str
+}
diff --git a/reader/json/parser.go b/reader/json/parser.go
new file mode 100644
index 0000000..18329e7
--- /dev/null
+++ b/reader/json/parser.go
@@ -0,0 +1,24 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "encoding/json"
+ "io"
+
+ "github.com/miniflux/miniflux2/errors"
+ "github.com/miniflux/miniflux2/model"
+)
+
+// Parse returns a normalized feed struct from a JON feed.
+func Parse(data io.Reader) (*model.Feed, error) {
+ feed := new(jsonFeed)
+ decoder := json.NewDecoder(data)
+ if err := decoder.Decode(&feed); err != nil {
+ return nil, errors.NewLocalizedError("Unable to parse JSON Feed: %v", err)
+ }
+
+ return feed.Transform(), nil
+}
diff --git a/reader/json/parser_test.go b/reader/json/parser_test.go
new file mode 100644
index 0000000..ecb11a1
--- /dev/null
+++ b/reader/json/parser_test.go
@@ -0,0 +1,359 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "bytes"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/miniflux/miniflux2/errors"
+)
+
+func TestParseJsonFeed(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "id": "2",
+ "content_text": "This is a second item.",
+ "url": "https://example.org/second-item"
+ },
+ {
+ "id": "1",
+ "content_html": "<p>Hello, world!</p>",
+ "url": "https://example.org/initial-post"
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "My Example Feed" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+
+ if feed.FeedURL != "https://example.org/feed.json" {
+ t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
+ }
+
+ if feed.SiteURL != "https://example.org/" {
+ t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
+ }
+
+ if len(feed.Entries) != 2 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].Hash != "d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35" {
+ t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
+ }
+
+ if feed.Entries[0].URL != "https://example.org/second-item" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+ }
+
+ if feed.Entries[0].Title != "This is a second item." {
+ t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
+ }
+
+ if feed.Entries[0].Content != "This is a second item." {
+ t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
+ }
+
+ if feed.Entries[1].Hash != "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b" {
+ t.Errorf("Incorrect entry hash, got: %s", feed.Entries[1].Hash)
+ }
+
+ if feed.Entries[1].URL != "https://example.org/initial-post" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL)
+ }
+
+ if feed.Entries[1].Title != "Hello, world!" {
+ t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[1].Title)
+ }
+
+ if feed.Entries[1].Content != "<p>Hello, world!</p>" {
+ t.Errorf("Incorrect entry content, got: %s", feed.Entries[1].Content)
+ }
+}
+
+func TestParsePodcast(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
+ "title": "The Record",
+ "home_page_url": "http://therecord.co/",
+ "feed_url": "http://therecord.co/feed.json",
+ "items": [
+ {
+ "id": "http://therecord.co/chris-parrish",
+ "title": "Special #1 - Chris Parrish",
+ "url": "http://therecord.co/chris-parrish",
+ "content_text": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.",
+ "content_html": "Chris has worked at <a href=\"http://adobe.com/\">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href=\"http://aged-and-distilled.com/napkin/\">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href=\"http://www.ci.bainbridge-isl.wa.us/\">Bainbridge Island</a>, a quick ferry ride from Seattle.",
+ "summary": "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
+ "date_published": "2014-05-09T14:04:00-07:00",
+ "attachments": [
+ {
+ "url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
+ "mime_type": "audio/x-m4a",
+ "size_in_bytes": 89970236,
+ "duration_in_seconds": 6629
+ }
+ ]
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "The Record" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+
+ if feed.FeedURL != "http://therecord.co/feed.json" {
+ t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
+ }
+
+ if feed.SiteURL != "http://therecord.co/" {
+ t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].Hash != "6b678e57962a1b001e4e873756563cdc08bbd06ca561e764e0baa9a382485797" {
+ t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
+ }
+
+ if feed.Entries[0].URL != "http://therecord.co/chris-parrish" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+ }
+
+ if feed.Entries[0].Title != "Special #1 - Chris Parrish" {
+ t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
+ }
+
+ if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` {
+ t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content)
+ }
+
+ location, _ := time.LoadLocation("America/Vancouver")
+ if !feed.Entries[0].Date.Equal(time.Date(2014, time.May, 9, 14, 4, 0, 0, location)) {
+ t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
+ }
+
+ if len(feed.Entries[0].Enclosures) != 1 {
+ t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+ }
+
+ if feed.Entries[0].Enclosures[0].URL != "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a" {
+ t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
+ }
+
+ if feed.Entries[0].Enclosures[0].MimeType != "audio/x-m4a" {
+ t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
+ }
+
+ if feed.Entries[0].Enclosures[0].Size != 89970236 {
+ t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
+ }
+}
+
+func TestParseAuthor(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
+ "title": "Brent Simmons’s Microblog",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "author": {
+ "name": "Brent Simmons",
+ "url": "http://example.org/",
+ "avatar": "https://example.org/avatar.png"
+ },
+ "items": [
+ {
+ "id": "2347259",
+ "url": "https://example.org/2347259",
+ "content_text": "Cats are neat. \n\nhttps://example.org/cats",
+ "date_published": "2016-02-09T14:22:00-07:00"
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].Author != "Brent Simmons" {
+ t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+ }
+}
+
+func TestParseFeedWithoutTitle(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "id": "2347259",
+ "url": "https://example.org/2347259",
+ "content_text": "Cats are neat. \n\nhttps://example.org/cats",
+ "date_published": "2016-02-09T14:22:00-07:00"
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "https://example.org/" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+}
+
+func TestParseFeedItemWithInvalidDate(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "id": "2347259",
+ "url": "https://example.org/2347259",
+ "content_text": "Cats are neat. \n\nhttps://example.org/cats",
+ "date_published": "Tomorrow"
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if !feed.Entries[0].Date.Before(time.Now()) {
+ t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
+ }
+}
+
+func TestParseFeedItemWithoutID(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "content_text": "Some text."
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].Hash != "13b4c5aecd1b6d749afcee968fbf9c80f1ed1bbdbe1aaf25cb34ebd01144bbe9" {
+ t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
+ }
+}
+
+func TestParseFeedItemWithoutTitle(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "url": "https://example.org/item"
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].Title != "https://example.org/item" {
+ t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+ }
+}
+
+func TestParseTruncateItemTitle(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "title": "` + strings.Repeat("a", 200) + `"
+ }
+ ]
+ }`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if len(feed.Entries[0].Title) != 103 {
+ t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+ }
+}
+
+func TestParseInvalidJSON(t *testing.T) {
+ data := `garbage`
+ _, err := Parse(bytes.NewBufferString(data))
+ if err == nil {
+ t.Error("Parse should returns an error")
+ }
+
+ if _, ok := err.(errors.LocalizedError); !ok {
+ t.Error("The error returned must be a LocalizedError")
+ }
+}