aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2019-12-23 14:39:54 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2019-12-23 14:45:58 -0800
commit200b1c304b999191a29f36d4122e7aa05481125c (patch)
treebd7b38bddd14a99f9c8eddf3e4860fce77d87166
parent1b33bb3d1cc01067fa8091b0e73a389fc189958c (diff)
Improve Dublin Core support for RDF feeds
-rw-r--r--reader/rdf/dublincore.go17
-rw-r--r--reader/rdf/parser_test.go33
-rw-r--r--reader/rdf/rdf.go51
3 files changed, 86 insertions, 15 deletions
diff --git a/reader/rdf/dublincore.go b/reader/rdf/dublincore.go
new file mode 100644
index 0000000..f06456d
--- /dev/null
+++ b/reader/rdf/dublincore.go
@@ -0,0 +1,17 @@
+// Copyright 2019 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package rdf // import "miniflux.app/reader/rdf"
+
+// DublinCoreFeedElement represents Dublin Core feed XML elements.
+type DublinCoreFeedElement struct {
+ DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"`
+}
+
+// DublinCoreEntryElement represents Dublin Core entry XML elements.
+type DublinCoreEntryElement struct {
+ DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
+ DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
+ DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
+}
diff --git a/reader/rdf/parser_test.go b/reader/rdf/parser_test.go
index 87410f8..0958f3c 100644
--- a/reader/rdf/parser_test.go
+++ b/reader/rdf/parser_test.go
@@ -486,3 +486,36 @@ func TestParseFeedWithURLWrappedInSpaces(t *testing.T) {
t.Errorf(`Unexpected entry URL, got %q`, feed.Entries[0].URL)
}
}
+
+func TestParseRDFWithContentEncoded(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rdf:RDF
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns="http://purl.org/rss/1.0/"
+ xmlns:content="http://purl.org/rss/1.0/modules/content/">
+ <channel>
+ <title>Example Feed</title>
+ <link>http://example.org/</link>
+ </channel>
+ <item>
+ <title>Item Title</title>
+ <link>http://example.org/</link>
+ <content:encoded><![CDATA[<p>Test</p>]]></content:encoded>
+ </item>
+ </rdf:RDF>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
+ }
+
+ expected := `<p>Test</p>`
+ result := feed.Entries[0].Content
+ if result != expected {
+ t.Errorf(`Unexpected entry URL, got %q instead of %q`, result, expected)
+ }
+}
diff --git a/reader/rdf/rdf.go b/reader/rdf/rdf.go
index 7061883..df4296b 100644
--- a/reader/rdf/rdf.go
+++ b/reader/rdf/rdf.go
@@ -21,8 +21,8 @@ type rdfFeed struct {
XMLName xml.Name `xml:"RDF"`
Title string `xml:"channel>title"`
Link string `xml:"channel>link"`
- Creator string `xml:"channel>creator"`
Items []rdfItem `xml:"item"`
+ DublinCoreFeedElement
}
func (r *rdfFeed) Transform() *model.Feed {
@@ -32,9 +32,10 @@ func (r *rdfFeed) Transform() *model.Feed {
for _, item := range r.Items {
entry := item.Transform()
- if entry.Author == "" && r.Creator != "" {
- entry.Author = sanitizer.StripTags(r.Creator)
+ if entry.Author == "" && r.DublinCoreCreator != "" {
+ entry.Author = strings.TrimSpace(r.DublinCoreCreator)
}
+ entry.Author = sanitizer.StripTags(entry.Author)
if entry.URL == "" {
entry.URL = feed.SiteURL
@@ -55,24 +56,44 @@ type rdfItem struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
- Creator string `xml:"creator"`
- Date string `xml:"date"`
+ DublinCoreEntryElement
}
func (r *rdfItem) Transform() *model.Entry {
entry := new(model.Entry)
- entry.Title = strings.TrimSpace(r.Title)
- entry.Author = strings.TrimSpace(r.Creator)
- entry.URL = strings.TrimSpace(r.Link)
- entry.Content = r.Description
- entry.Hash = getHash(r)
- entry.Date = getDate(r)
+ entry.Title = r.entryTitle()
+ entry.Author = r.entryAuthor()
+ entry.URL = r.entryURL()
+ entry.Content = r.entryContent()
+ entry.Hash = r.entryHash()
+ entry.Date = r.entryDate()
return entry
}
-func getDate(r *rdfItem) time.Time {
- if r.Date != "" {
- result, err := date.Parse(r.Date)
+func (r *rdfItem) entryTitle() string {
+ return strings.TrimSpace(r.Title)
+}
+
+func (r *rdfItem) entryContent() string {
+ switch {
+ case r.DublinCoreContent != "":
+ return r.DublinCoreContent
+ default:
+ return r.Description
+ }
+}
+
+func (r *rdfItem) entryAuthor() string {
+ return strings.TrimSpace(r.DublinCoreCreator)
+}
+
+func (r *rdfItem) entryURL() string {
+ return strings.TrimSpace(r.Link)
+}
+
+func (r *rdfItem) entryDate() time.Time {
+ if r.DublinCoreDate != "" {
+ result, err := date.Parse(r.DublinCoreDate)
if err != nil {
logger.Error("rdf: %v", err)
return time.Now()
@@ -84,7 +105,7 @@ func getDate(r *rdfItem) time.Time {
return time.Now()
}
-func getHash(r *rdfItem) string {
+func (r *rdfItem) entryHash() string {
value := r.Link
if value == "" {
value = r.Title + r.Description