aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader
diff options
context:
space:
mode:
authorGravatar Peter De Wachter <pdewacht@gmail.com>2019-01-01 22:01:19 +0100
committerGravatar fguillot <fred@miniflux.net>2019-01-07 17:55:02 -0800
commit0cdcec10caffab74ff742e7bc8379a956357d73f (patch)
tree11401b13928c383f32f1525d8cdd7f6c09b1a2b0 /reader
parent15505ee4a2bd4963d0cbc9d1820e9be641b221ca (diff)
More robust Atom text handling
Miniflux couldn't deal with XHTML Summary elements. - Make Summary an 'atomContent' field - Define an atomContentToString function rather than inling it three times - Also properly escape special characters in plain text fields.
Diffstat (limited to 'reader')
-rw-r--r--reader/atom/atom.go39
-rw-r--r--reader/atom/parser_test.go78
2 files changed, 102 insertions, 15 deletions
diff --git a/reader/atom/atom.go b/reader/atom/atom.go
index 7fbf040..28d4e15 100644
--- a/reader/atom/atom.go
+++ b/reader/atom/atom.go
@@ -6,6 +6,7 @@ package atom // import "miniflux.app/reader/atom"
import (
"encoding/xml"
+ "html"
"strconv"
"strings"
"time"
@@ -33,7 +34,7 @@ type atomEntry struct {
Published string `xml:"published"`
Updated string `xml:"updated"`
Links []atomLink `xml:"link"`
- Summary string `xml:"summary"`
+ Summary atomContent `xml:"summary"`
Content atomContent `xml:"content"`
MediaGroup atomMediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
Author atomAuthor `xml:"author"`
@@ -147,17 +148,31 @@ func getDate(a *atomEntry) time.Time {
return time.Now()
}
-func getContent(a *atomEntry) string {
- if a.Content.Type == "html" || a.Content.Type == "text" {
- return a.Content.Data
+func atomContentToString(c atomContent) string {
+ if c.Type == "xhtml" {
+ return c.XML
}
- if a.Content.Type == "xhtml" {
- return a.Content.XML
+ if c.Type == "html" {
+ return c.Data
}
- if a.Summary != "" {
- return a.Summary
+ if c.Type == "text" || c.Type == "" {
+ return html.EscapeString(c.Data)
+ }
+
+ return ""
+}
+
+func getContent(a *atomEntry) string {
+ r := atomContentToString(a.Content)
+ if r != "" {
+ return r
+ }
+
+ r = atomContentToString(a.Summary)
+ if r != "" {
+ return r
}
if a.MediaGroup.Description != "" {
@@ -168,13 +183,7 @@ func getContent(a *atomEntry) string {
}
func getTitle(a *atomEntry) string {
- title := ""
- if a.Title.Type == "xhtml" {
- title = a.Title.XML
- } else {
- title = a.Title.Data
- }
-
+ title := atomContentToString(a.Title)
return strings.TrimSpace(sanitizer.StripTags(title))
}
diff --git a/reader/atom/parser_test.go b/reader/atom/parser_test.go
index 82a569a..7339bec 100644
--- a/reader/atom/parser_test.go
+++ b/reader/atom/parser_test.go
@@ -282,6 +282,84 @@ func TestParseEntryTitleWithXHTML(t *testing.T) {
}
}
+func TestParseEntrySummaryWithXHTML(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+
+ <entry>
+ <title type="xhtml"><code>Test</code> Test</title>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary type="xhtml"><p>Some text.</p></summary>
+ </entry>
+
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Content != "<p>Some text.</p>" {
+ t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
+ }
+}
+
+func TestParseEntrySummaryWithHTML(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+
+ <entry>
+ <title type="html">&lt;code&gt;Test&lt;/code&gt; Test</title>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary type="html"><![CDATA[<p>Some text.</p>]]></summary>
+ </entry>
+
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Content != "<p>Some text.</p>" {
+ t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
+ }
+}
+
+func TestParseEntrySummaryWithPlainText(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+
+ <entry>
+ <title type="html">&lt;code&gt;Test&lt;/code&gt; Test</title>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary type="text"><![CDATA[<Some text.>]]></summary>
+ </entry>
+
+ </feed>`
+
+ feed, err := Parse(bytes.NewBufferString(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Entries[0].Content != "&lt;Some text.&gt;" {
+ t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
+ }
+}
+
func TestParseEntryWithAuthorName(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">