diff options
author | 2019-01-01 22:01:19 +0100 | |
---|---|---|
committer | 2019-01-07 17:55:02 -0800 | |
commit | 0cdcec10caffab74ff742e7bc8379a956357d73f (patch) | |
tree | 11401b13928c383f32f1525d8cdd7f6c09b1a2b0 | |
parent | 15505ee4a2bd4963d0cbc9d1820e9be641b221ca (diff) |
More robust Atom text handling
Miniflux couldn't deal with XHTML Summary elements.
- Make Summary an 'atomContent' field
- Define an atomContentToString function rather than inling it three times
- Also properly escape special characters in plain text fields.
-rw-r--r-- | reader/atom/atom.go | 39 | ||||
-rw-r--r-- | reader/atom/parser_test.go | 78 |
2 files changed, 102 insertions, 15 deletions
diff --git a/reader/atom/atom.go b/reader/atom/atom.go index 7fbf040..28d4e15 100644 --- a/reader/atom/atom.go +++ b/reader/atom/atom.go @@ -6,6 +6,7 @@ package atom // import "miniflux.app/reader/atom" import ( "encoding/xml" + "html" "strconv" "strings" "time" @@ -33,7 +34,7 @@ type atomEntry struct { Published string `xml:"published"` Updated string `xml:"updated"` Links []atomLink `xml:"link"` - Summary string `xml:"summary"` + Summary atomContent `xml:"summary"` Content atomContent `xml:"content"` MediaGroup atomMediaGroup `xml:"http://search.yahoo.com/mrss/ group"` Author atomAuthor `xml:"author"` @@ -147,17 +148,31 @@ func getDate(a *atomEntry) time.Time { return time.Now() } -func getContent(a *atomEntry) string { - if a.Content.Type == "html" || a.Content.Type == "text" { - return a.Content.Data +func atomContentToString(c atomContent) string { + if c.Type == "xhtml" { + return c.XML } - if a.Content.Type == "xhtml" { - return a.Content.XML + if c.Type == "html" { + return c.Data } - if a.Summary != "" { - return a.Summary + if c.Type == "text" || c.Type == "" { + return html.EscapeString(c.Data) + } + + return "" +} + +func getContent(a *atomEntry) string { + r := atomContentToString(a.Content) + if r != "" { + return r + } + + r = atomContentToString(a.Summary) + if r != "" { + return r } if a.MediaGroup.Description != "" { @@ -168,13 +183,7 @@ func getContent(a *atomEntry) string { } func getTitle(a *atomEntry) string { - title := "" - if a.Title.Type == "xhtml" { - title = a.Title.XML - } else { - title = a.Title.Data - } - + title := atomContentToString(a.Title) return strings.TrimSpace(sanitizer.StripTags(title)) } diff --git a/reader/atom/parser_test.go b/reader/atom/parser_test.go index 82a569a..7339bec 100644 --- a/reader/atom/parser_test.go +++ b/reader/atom/parser_test.go @@ -282,6 +282,84 @@ func TestParseEntryTitleWithXHTML(t *testing.T) { } } +func TestParseEntrySummaryWithXHTML(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title type="xhtml"><code>Test</code> Test</title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary type="xhtml"><p>Some text.</p></summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Content != "<p>Some text.</p>" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseEntrySummaryWithHTML(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title type="html"><code>Test</code> Test</title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary type="html"><![CDATA[<p>Some text.</p>]]></summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Content != "<p>Some text.</p>" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseEntrySummaryWithPlainText(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <title>Example Feed</title> + <link href="http://example.org/"/> + + <entry> + <title type="html"><code>Test</code> Test</title> + <link href="http://example.org/2003/12/13/atom03"/> + <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> + <updated>2003-12-13T18:30:02Z</updated> + <summary type="text"><![CDATA[<Some text.>]]></summary> + </entry> + + </feed>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Content != "<Some text.>" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + func TestParseEntryWithAuthorName(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> |