Move feed parsers packages in reader package

author: Frédéric Guillot <fred@miniflux.net> 2017-11-20 19:17:04 -0800
committer: Frédéric Guillot <fred@miniflux.net> 2017-11-20 19:17:04 -0800
commit: d5838b67340ca83dcc32f6d1775c183188ec5e7a (patch)
tree: a71705463458e5d2a6f74b0323119c3376f89b97 /reader/json
parent: c26787f47667f69a2d3e85db01fe20b9bc70bc9a (diff)
3 files changed, 554 insertions, 0 deletions
diff --git a/reader/json/json.go b/reader/json/json.go
new file mode 100644
index 0000000..cd6a1c8
--- /dev/null
+++ b/reader/json/json.go
@@ -0,0 +1,171 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+	"log"
+	"strings"
+	"time"
+
+	"github.com/miniflux/miniflux2/helper"
+	"github.com/miniflux/miniflux2/model"
+	"github.com/miniflux/miniflux2/reader/date"
+	"github.com/miniflux/miniflux2/reader/processor"
+	"github.com/miniflux/miniflux2/reader/sanitizer"
+)
+
+type jsonFeed struct {
+	Version string     `json:"version"`
+	Title   string     `json:"title"`
+	SiteURL string     `json:"home_page_url"`
+	FeedURL string     `json:"feed_url"`
+	Author  jsonAuthor `json:"author"`
+	Items   []jsonItem `json:"items"`
+}
+
+type jsonAuthor struct {
+	Name string `json:"name"`
+	URL  string `json:"url"`
+}
+
+type jsonItem struct {
+	ID            string           `json:"id"`
+	URL           string           `json:"url"`
+	Title         string           `json:"title"`
+	Summary       string           `json:"summary"`
+	Text          string           `json:"content_text"`
+	HTML          string           `json:"content_html"`
+	DatePublished string           `json:"date_published"`
+	DateModified  string           `json:"date_modified"`
+	Author        jsonAuthor       `json:"author"`
+	Attachments   []jsonAttachment `json:"attachments"`
+}
+
+type jsonAttachment struct {
+	URL      string `json:"url"`
+	MimeType string `json:"mime_type"`
+	Title    string `json:"title"`
+	Size     int    `json:"size_in_bytes"`
+	Duration int    `json:"duration_in_seconds"`
+}
+
+func (j *jsonFeed) GetAuthor() string {
+	return getAuthor(j.Author)
+}
+
+func (j *jsonFeed) Transform() *model.Feed {
+	feed := new(model.Feed)
+	feed.FeedURL = j.FeedURL
+	feed.SiteURL = j.SiteURL
+	feed.Title = sanitizer.StripTags(j.Title)
+
+	if feed.Title == "" {
+		feed.Title = feed.SiteURL
+	}
+
+	for _, item := range j.Items {
+		entry := item.Transform()
+		if entry.Author == "" {
+			entry.Author = j.GetAuthor()
+		}
+
+		feed.Entries = append(feed.Entries, entry)
+	}
+
+	return feed
+}
+
+func (j *jsonItem) GetDate() time.Time {
+	for _, value := range []string{j.DatePublished, j.DateModified} {
+		if value != "" {
+			d, err := date.Parse(value)
+			if err != nil {
+				log.Println(err)
+				return time.Now()
+			}
+
+			return d
+		}
+	}
+
+	return time.Now()
+}
+
+func (j *jsonItem) GetAuthor() string {
+	return getAuthor(j.Author)
+}
+
+func (j *jsonItem) GetHash() string {
+	for _, value := range []string{j.ID, j.URL, j.Text + j.HTML + j.Summary} {
+		if value != "" {
+			return helper.Hash(value)
+		}
+	}
+
+	return ""
+}
+
+func (j *jsonItem) GetTitle() string {
+	for _, value := range []string{j.Title, j.Summary, j.Text, j.HTML} {
+		if value != "" {
+			return truncate(value)
+		}
+	}
+
+	return j.URL
+}
+
+func (j *jsonItem) GetContent() string {
+	for _, value := range []string{j.HTML, j.Text, j.Summary} {
+		if value != "" {
+			return value
+		}
+	}
+
+	return ""
+}
+
+func (j *jsonItem) GetEnclosures() model.EnclosureList {
+	enclosures := make(model.EnclosureList, 0)
+
+	for _, attachment := range j.Attachments {
+		enclosures = append(enclosures, &model.Enclosure{
+			URL:      attachment.URL,
+			MimeType: attachment.MimeType,
+			Size:     attachment.Size,
+		})
+	}
+
+	return enclosures
+}
+
+func (j *jsonItem) Transform() *model.Entry {
+	entry := new(model.Entry)
+	entry.URL = j.URL
+	entry.Date = j.GetDate()
+	entry.Author = sanitizer.StripTags(j.GetAuthor())
+	entry.Hash = j.GetHash()
+	entry.Content = processor.ItemContentProcessor(entry.URL, j.GetContent())
+	entry.Title = sanitizer.StripTags(strings.Trim(j.GetTitle(), " \n\t"))
+	entry.Enclosures = j.GetEnclosures()
+	return entry
+}
+
+func getAuthor(author jsonAuthor) string {
+	if author.Name != "" {
+		return author.Name
+	}
+
+	return ""
+}
+
+func truncate(str string) string {
+	max := 100
+	if len(str) > max {
+		return str[:max] + "..."
+	}
+
+	return str
+}
diff --git a/reader/json/parser.go b/reader/json/parser.go
new file mode 100644
index 0000000..18329e7
--- /dev/null
+++ b/reader/json/parser.go
@@ -0,0 +1,24 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+	"encoding/json"
+	"io"
+
+	"github.com/miniflux/miniflux2/errors"
+	"github.com/miniflux/miniflux2/model"
+)
+
+// Parse returns a normalized feed struct from a JON feed.
+func Parse(data io.Reader) (*model.Feed, error) {
+	feed := new(jsonFeed)
+	decoder := json.NewDecoder(data)
+	if err := decoder.Decode(&feed); err != nil {
+		return nil, errors.NewLocalizedError("Unable to parse JSON Feed: %v", err)
+	}
+
+	return feed.Transform(), nil
+}
diff --git a/reader/json/parser_test.go b/reader/json/parser_test.go
new file mode 100644
index 0000000..ecb11a1
--- /dev/null
+++ b/reader/json/parser_test.go
@@ -0,0 +1,359 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/miniflux/miniflux2/errors"
+)
+
+func TestParseJsonFeed(t *testing.T) {
+	data := `{
+		"version": "https://jsonfeed.org/version/1",
+		"title": "My Example Feed",
+		"home_page_url": "https://example.org/",
+		"feed_url": "https://example.org/feed.json",
+		"items": [
+			{
+				"id": "2",
+				"content_text": "This is a second item.",
+				"url": "https://example.org/second-item"
+			},
+			{
+				"id": "1",
+				"content_html": "<p>Hello, world!</p>",
+				"url": "https://example.org/initial-post"
+			}
+		]
+	}`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Error(err)
+	}
+
+	if feed.Title != "My Example Feed" {
+		t.Errorf("Incorrect title, got: %s", feed.Title)
+	}
+
+	if feed.FeedURL != "https://example.org/feed.json" {
+		t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
+	}
+
+	if feed.SiteURL != "https://example.org/" {
+		t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
+	}
+
+	if len(feed.Entries) != 2 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if feed.Entries[0].Hash != "d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35" {
+		t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
+	}
+
+	if feed.Entries[0].URL != "https://example.org/second-item" {
+		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+	}
+
+	if feed.Entries[0].Title != "This is a second item." {
+		t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
+	}
+
+	if feed.Entries[0].Content != "This is a second item." {
+		t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
+	}
+
+	if feed.Entries[1].Hash != "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b" {
+		t.Errorf("Incorrect entry hash, got: %s", feed.Entries[1].Hash)
+	}
+
+	if feed.Entries[1].URL != "https://example.org/initial-post" {
+		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL)
+	}
+
+	if feed.Entries[1].Title != "Hello, world!" {
+		t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[1].Title)
+	}
+
+	if feed.Entries[1].Content != "<p>Hello, world!</p>" {
+		t.Errorf("Incorrect entry content, got: %s", feed.Entries[1].Content)
+	}
+}
+
+func TestParsePodcast(t *testing.T) {
+	data := `{
+		"version": "https://jsonfeed.org/version/1",
+		"user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
+		"title": "The Record",
+		"home_page_url": "http://therecord.co/",
+		"feed_url": "http://therecord.co/feed.json",
+		"items": [
+			{
+				"id": "http://therecord.co/chris-parrish",
+				"title": "Special #1 - Chris Parrish",
+				"url": "http://therecord.co/chris-parrish",
+				"content_text": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.",
+				"content_html": "Chris has worked at <a href=\"http://adobe.com/\">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href=\"http://aged-and-distilled.com/napkin/\">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href=\"http://www.ci.bainbridge-isl.wa.us/\">Bainbridge Island</a>, a quick ferry ride from Seattle.",
+				"summary": "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
+				"date_published": "2014-05-09T14:04:00-07:00",
+				"attachments": [
+					{
+						"url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
+						"mime_type": "audio/x-m4a",
+						"size_in_bytes": 89970236,
+						"duration_in_seconds": 6629
+					}
+				]
+			}
+		]
+	}`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Error(err)
+	}
+
+	if feed.Title != "The Record" {
+		t.Errorf("Incorrect title, got: %s", feed.Title)
+	}
+
+	if feed.FeedURL != "http://therecord.co/feed.json" {
+		t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
+	}
+
+	if feed.SiteURL != "http://therecord.co/" {
+		t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if feed.Entries[0].Hash != "6b678e57962a1b001e4e873756563cdc08bbd06ca561e764e0baa9a382485797" {
+		t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
+	}
+
+	if feed.Entries[0].URL != "http://therecord.co/chris-parrish" {
+		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+	}
+
+	if feed.Entries[0].Title != "Special #1 - Chris Parrish" {
+		t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
+	}
+
+	if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` {
+		t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content)
+	}
+
+	location, _ := time.LoadLocation("America/Vancouver")
+	if !feed.Entries[0].Date.Equal(time.Date(2014, time.May, 9, 14, 4, 0, 0, location)) {
+		t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
+	}
+
+	if len(feed.Entries[0].Enclosures) != 1 {
+		t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+	}
+
+	if feed.Entries[0].Enclosures[0].URL != "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a" {
+		t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
+	}
+
+	if feed.Entries[0].Enclosures[0].MimeType != "audio/x-m4a" {
+		t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
+	}
+
+	if feed.Entries[0].Enclosures[0].Size != 89970236 {
+		t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
+	}
+}
+
+func TestParseAuthor(t *testing.T) {
+	data := `{
+		"version": "https://jsonfeed.org/version/1",
+		"user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
+		"title": "Brent Simmons’s Microblog",
+		"home_page_url": "https://example.org/",
+		"feed_url": "https://example.org/feed.json",
+		"author": {
+			"name": "Brent Simmons",
+			"url": "http://example.org/",
+			"avatar": "https://example.org/avatar.png"
+		},
+		"items": [
+			{
+				"id": "2347259",
+				"url": "https://example.org/2347259",
+				"content_text": "Cats are neat. \n\nhttps://example.org/cats",
+				"date_published": "2016-02-09T14:22:00-07:00"
+			}
+		]
+	}`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Error(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if feed.Entries[0].Author != "Brent Simmons" {
+		t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+	}
+}
+
+func TestParseFeedWithoutTitle(t *testing.T) {
+	data := `{
+		"version": "https://jsonfeed.org/version/1",
+		"home_page_url": "https://example.org/",
+		"feed_url": "https://example.org/feed.json",
+		"items": [
+			{
+				"id": "2347259",
+				"url": "https://example.org/2347259",
+				"content_text": "Cats are neat. \n\nhttps://example.org/cats",
+				"date_published": "2016-02-09T14:22:00-07:00"
+			}
+		]
+	}`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Error(err)
+	}
+
+	if feed.Title != "https://example.org/" {
+		t.Errorf("Incorrect title, got: %s", feed.Title)
+	}
+}
+
+func TestParseFeedItemWithInvalidDate(t *testing.T) {
+	data := `{
+		"version": "https://jsonfeed.org/version/1",
+		"title": "My Example Feed",
+		"home_page_url": "https://example.org/",
+		"feed_url": "https://example.org/feed.json",
+		"items": [
+			{
+				"id": "2347259",
+				"url": "https://example.org/2347259",
+				"content_text": "Cats are neat. \n\nhttps://example.org/cats",
+				"date_published": "Tomorrow"
+			}
+		]
+	}`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Error(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if !feed.Entries[0].Date.Before(time.Now()) {
+		t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
+	}
+}
+
+func TestParseFeedItemWithoutID(t *testing.T) {
+	data := `{
+		"version": "https://jsonfeed.org/version/1",
+		"title": "My Example Feed",
+		"home_page_url": "https://example.org/",
+		"feed_url": "https://example.org/feed.json",
+		"items": [
+			{
+				"content_text": "Some text."
+			}
+		]
+	}`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Error(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if feed.Entries[0].Hash != "13b4c5aecd1b6d749afcee968fbf9c80f1ed1bbdbe1aaf25cb34ebd01144bbe9" {
+		t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
+	}
+}
+
+func TestParseFeedItemWithoutTitle(t *testing.T) {
+	data := `{
+		"version": "https://jsonfeed.org/version/1",
+		"title": "My Example Feed",
+		"home_page_url": "https://example.org/",
+		"feed_url": "https://example.org/feed.json",
+		"items": [
+			{
+				"url": "https://example.org/item"
+			}
+		]
+	}`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Error(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if feed.Entries[0].Title != "https://example.org/item" {
+		t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+	}
+}
+
+func TestParseTruncateItemTitle(t *testing.T) {
+	data := `{
+		"version": "https://jsonfeed.org/version/1",
+		"title": "My Example Feed",
+		"home_page_url": "https://example.org/",
+		"feed_url": "https://example.org/feed.json",
+		"items": [
+			{
+				"title": "` + strings.Repeat("a", 200) + `"
+			}
+		]
+	}`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Error(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if len(feed.Entries[0].Title) != 103 {
+		t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+	}
+}
+
+func TestParseInvalidJSON(t *testing.T) {
+	data := `garbage`
+	_, err := Parse(bytes.NewBufferString(data))
+	if err == nil {
+		t.Error("Parse should returns an error")
+	}
+
+	if _, ok := err.(errors.LocalizedError); !ok {
+		t.Error("The error returned must be a LocalizedError")
+	}
+}
author	Frédéric Guillot <fred@miniflux.net>	2017-11-20 19:17:04 -0800
committer	Frédéric Guillot <fred@miniflux.net>	2017-11-20 19:17:04 -0800
commit	d5838b67340ca83dcc32f6d1775c183188ec5e7a (patch)
tree	a71705463458e5d2a6f74b0323119c3376f89b97 /reader/json
parent	c26787f47667f69a2d3e85db01fe20b9bc70bc9a (diff)