From 200b1c304b999191a29f36d4122e7aa05481125c Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Mon, 23 Dec 2019 14:39:54 -0800 Subject: Improve Dublin Core support for RDF feeds --- reader/rdf/dublincore.go | 17 ++++++++++++++++ reader/rdf/parser_test.go | 33 ++++++++++++++++++++++++++++++ reader/rdf/rdf.go | 51 +++++++++++++++++++++++++++++++++-------------- 3 files changed, 86 insertions(+), 15 deletions(-) create mode 100644 reader/rdf/dublincore.go diff --git a/reader/rdf/dublincore.go b/reader/rdf/dublincore.go new file mode 100644 index 0000000..f06456d --- /dev/null +++ b/reader/rdf/dublincore.go @@ -0,0 +1,17 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package rdf // import "miniflux.app/reader/rdf" + +// DublinCoreFeedElement represents Dublin Core feed XML elements. +type DublinCoreFeedElement struct { + DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"` +} + +// DublinCoreEntryElement represents Dublin Core entry XML elements. +type DublinCoreEntryElement struct { + DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"` + DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"` + DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"` +} diff --git a/reader/rdf/parser_test.go b/reader/rdf/parser_test.go index 87410f8..0958f3c 100644 --- a/reader/rdf/parser_test.go +++ b/reader/rdf/parser_test.go @@ -486,3 +486,36 @@ func TestParseFeedWithURLWrappedInSpaces(t *testing.T) { t.Errorf(`Unexpected entry URL, got %q`, feed.Entries[0].URL) } } + +func TestParseRDFWithContentEncoded(t *testing.T) { + data := ` + + + Example Feed + http://example.org/ + + + Item Title + http://example.org/ + Test

]]>
+
+
` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries)) + } + + expected := `

Test

` + result := feed.Entries[0].Content + if result != expected { + t.Errorf(`Unexpected entry URL, got %q instead of %q`, result, expected) + } +} diff --git a/reader/rdf/rdf.go b/reader/rdf/rdf.go index 7061883..df4296b 100644 --- a/reader/rdf/rdf.go +++ b/reader/rdf/rdf.go @@ -21,8 +21,8 @@ type rdfFeed struct { XMLName xml.Name `xml:"RDF"` Title string `xml:"channel>title"` Link string `xml:"channel>link"` - Creator string `xml:"channel>creator"` Items []rdfItem `xml:"item"` + DublinCoreFeedElement } func (r *rdfFeed) Transform() *model.Feed { @@ -32,9 +32,10 @@ func (r *rdfFeed) Transform() *model.Feed { for _, item := range r.Items { entry := item.Transform() - if entry.Author == "" && r.Creator != "" { - entry.Author = sanitizer.StripTags(r.Creator) + if entry.Author == "" && r.DublinCoreCreator != "" { + entry.Author = strings.TrimSpace(r.DublinCoreCreator) } + entry.Author = sanitizer.StripTags(entry.Author) if entry.URL == "" { entry.URL = feed.SiteURL @@ -55,24 +56,44 @@ type rdfItem struct { Title string `xml:"title"` Link string `xml:"link"` Description string `xml:"description"` - Creator string `xml:"creator"` - Date string `xml:"date"` + DublinCoreEntryElement } func (r *rdfItem) Transform() *model.Entry { entry := new(model.Entry) - entry.Title = strings.TrimSpace(r.Title) - entry.Author = strings.TrimSpace(r.Creator) - entry.URL = strings.TrimSpace(r.Link) - entry.Content = r.Description - entry.Hash = getHash(r) - entry.Date = getDate(r) + entry.Title = r.entryTitle() + entry.Author = r.entryAuthor() + entry.URL = r.entryURL() + entry.Content = r.entryContent() + entry.Hash = r.entryHash() + entry.Date = r.entryDate() return entry } -func getDate(r *rdfItem) time.Time { - if r.Date != "" { - result, err := date.Parse(r.Date) +func (r *rdfItem) entryTitle() string { + return strings.TrimSpace(r.Title) +} + +func (r *rdfItem) entryContent() string { + switch { + case r.DublinCoreContent != "": + return r.DublinCoreContent + default: + return r.Description + } +} + +func (r *rdfItem) entryAuthor() string { + return strings.TrimSpace(r.DublinCoreCreator) +} + +func (r *rdfItem) entryURL() string { + return strings.TrimSpace(r.Link) +} + +func (r *rdfItem) entryDate() time.Time { + if r.DublinCoreDate != "" { + result, err := date.Parse(r.DublinCoreDate) if err != nil { logger.Error("rdf: %v", err) return time.Now() @@ -84,7 +105,7 @@ func getDate(r *rdfItem) time.Time { return time.Now() } -func getHash(r *rdfItem) string { +func (r *rdfItem) entryHash() string { value := r.Link if value == "" { value = r.Title + r.Description -- cgit v1.2.3