aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2018-02-17 12:21:58 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2018-02-17 12:21:58 -0800
commit9292d5d6049de7a876c5428039c63e7c7720452b (patch)
treeafced42a58a6fc9170fdd92f83ef13f5cdb316ca
parent3ba280e10c4b876331b964c93d720ad50d9f99d7 (diff)
Handle Atom feeds with HTML title
-rw-r--r--reader/atom/atom.go16
-rw-r--r--reader/atom/parser_test.go78
2 files changed, 92 insertions, 2 deletions
diff --git a/reader/atom/atom.go b/reader/atom/atom.go
index 68a3903..4697125 100644
--- a/reader/atom/atom.go
+++ b/reader/atom/atom.go
@@ -14,6 +14,7 @@ import (
"github.com/miniflux/miniflux/logger"
"github.com/miniflux/miniflux/model"
"github.com/miniflux/miniflux/reader/date"
+ "github.com/miniflux/miniflux/reader/sanitizer"
"github.com/miniflux/miniflux/url"
)
@@ -28,7 +29,7 @@ type atomFeed struct {
type atomEntry struct {
ID string `xml:"id"`
- Title string `xml:"title"`
+ Title atomContent `xml:"title"`
Updated string `xml:"updated"`
Links []atomLink `xml:"link"`
Summary string `xml:"summary"`
@@ -97,7 +98,7 @@ func (a *atomEntry) Transform() *model.Entry {
entry.Author = getAuthor(a.Author)
entry.Hash = getHash(a)
entry.Content = getContent(a)
- entry.Title = strings.TrimSpace(a.Title)
+ entry.Title = getTitle(a)
entry.Enclosures = getEnclosures(a)
return entry
}
@@ -160,6 +161,17 @@ func getContent(a *atomEntry) string {
return ""
}
+func getTitle(a *atomEntry) string {
+ title := ""
+ if a.Title.Type == "xhtml" {
+ title = a.Title.XML
+ } else {
+ title = a.Title.Data
+ }
+
+ return strings.TrimSpace(sanitizer.StripTags(title))
+}
+
func getHash(a *atomEntry) string {
for _, value := range []string{a.ID, getURL(a.Links)} {
if value != "" {
diff --git a/reader/atom/parser_test.go b/reader/atom/parser_test.go
index be07383..ec9186c 100644
--- a/reader/atom/parser_test.go
+++ b/reader/atom/parser_test.go
@@ -206,6 +206,84 @@ func TestParseEntryTitleWithWhitespaces(t *testing.T) {
}
}
+func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+
+ <entry>
+ <title type="html"><![CDATA[Test &#8220;Test&#8221;]]></title>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary>Some text.</summary>
+ </entry>
+
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Title != "Test “Test”" {
+ t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+ }
+}
+
+func TestParseEntryTitleWithHTML(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+
+ <entry>
+ <title type="html">&lt;code&gt;Test&lt;/code&gt; Test</title>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary>Some text.</summary>
+ </entry>
+
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Title != "Test Test" {
+ t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+ }
+}
+
+func TestParseEntryTitleWithXHTML(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+
+ <entry>
+ <title type="xhtml"><code>Test</code> Test</title>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary>Some text.</summary>
+ </entry>
+
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Title != "Test Test" {
+ t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+ }
+}
+
func TestParseEntryWithAuthorName(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">