diff options
author | Frédéric Guillot <fred@miniflux.net> | 2018-02-17 12:21:58 -0800 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2018-02-17 12:21:58 -0800 |
commit | 9292d5d6049de7a876c5428039c63e7c7720452b (patch) | |
tree | afced42a58a6fc9170fdd92f83ef13f5cdb316ca | |
parent | 3ba280e10c4b876331b964c93d720ad50d9f99d7 (diff) |
Handle Atom feeds with HTML title
-rw-r--r-- | reader/atom/atom.go | 16 | ||||
-rw-r--r-- | reader/atom/parser_test.go | 78 |
2 files changed, 92 insertions, 2 deletions
diff --git a/reader/atom/atom.go b/reader/atom/atom.go index 68a3903..4697125 100644 --- a/reader/atom/atom.go +++ b/reader/atom/atom.go @@ -14,6 +14,7 @@ import ( "github.com/miniflux/miniflux/logger" "github.com/miniflux/miniflux/model" "github.com/miniflux/miniflux/reader/date" + "github.com/miniflux/miniflux/reader/sanitizer" "github.com/miniflux/miniflux/url" ) @@ -28,7 +29,7 @@ type atomFeed struct { type atomEntry struct { ID string `xml:"id"` - Title string `xml:"title"` + Title atomContent `xml:"title"` Updated string `xml:"updated"` Links []atomLink `xml:"link"` Summary string `xml:"summary"` @@ -97,7 +98,7 @@ func (a *atomEntry) Transform() *model.Entry { entry.Author = getAuthor(a.Author) entry.Hash = getHash(a) entry.Content = getContent(a) - entry.Title = strings.TrimSpace(a.Title) + entry.Title = getTitle(a) entry.Enclosures = getEnclosures(a) return entry } @@ -160,6 +161,17 @@ func getContent(a *atomEntry) string { return "" } +func getTitle(a *atomEntry) string { + title := "" + if a.Title.Type == "xhtml" { + title = a.Title.XML + } else { + title = a.Title.Data + } + + return strings.TrimSpace(sanitizer.StripTags(title)) +} + func getHash(a *atomEntry) string { for _, value := range []string{a.ID, getURL(a.Links)} { if value != "" { diff --git a/reader/atom/parser_test.go b/reader/atom/parser_test.go index be07383..ec9186c 100644 --- a/reader/atom/parser_test.go +++ b/reader/atom/parser_test.go @@ -206,6 +206,84 @@ func TestParseEntryTitleWithWhitespaces(t *testing.T) { } } +func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title type="html"><![CDATA[Test “Test”]]></title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Title != "Test “Test”" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + +func TestParseEntryTitleWithHTML(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title type="html"><code>Test</code> Test</title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Title != "Test Test" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + +func TestParseEntryTitleWithXHTML(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title type="xhtml"><code>Test</code> Test</title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary>Some text.</summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Title != "Test Test" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + func TestParseEntryWithAuthorName(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> |