From 0cdcec10caffab74ff742e7bc8379a956357d73f Mon Sep 17 00:00:00 2001 From: Peter De Wachter Date: Tue, 1 Jan 2019 22:01:19 +0100 Subject: More robust Atom text handling Miniflux couldn't deal with XHTML Summary elements. - Make Summary an 'atomContent' field - Define an atomContentToString function rather than inling it three times - Also properly escape special characters in plain text fields. --- reader/atom/atom.go | 39 ++++++++++++++--------- reader/atom/parser_test.go | 78 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 15 deletions(-) (limited to 'reader') diff --git a/reader/atom/atom.go b/reader/atom/atom.go index 7fbf040..28d4e15 100644 --- a/reader/atom/atom.go +++ b/reader/atom/atom.go @@ -6,6 +6,7 @@ package atom // import "miniflux.app/reader/atom" import ( "encoding/xml" + "html" "strconv" "strings" "time" @@ -33,7 +34,7 @@ type atomEntry struct { Published string `xml:"published"` Updated string `xml:"updated"` Links []atomLink `xml:"link"` - Summary string `xml:"summary"` + Summary atomContent `xml:"summary"` Content atomContent `xml:"content"` MediaGroup atomMediaGroup `xml:"http://search.yahoo.com/mrss/ group"` Author atomAuthor `xml:"author"` @@ -147,17 +148,31 @@ func getDate(a *atomEntry) time.Time { return time.Now() } -func getContent(a *atomEntry) string { - if a.Content.Type == "html" || a.Content.Type == "text" { - return a.Content.Data +func atomContentToString(c atomContent) string { + if c.Type == "xhtml" { + return c.XML } - if a.Content.Type == "xhtml" { - return a.Content.XML + if c.Type == "html" { + return c.Data } - if a.Summary != "" { - return a.Summary + if c.Type == "text" || c.Type == "" { + return html.EscapeString(c.Data) + } + + return "" +} + +func getContent(a *atomEntry) string { + r := atomContentToString(a.Content) + if r != "" { + return r + } + + r = atomContentToString(a.Summary) + if r != "" { + return r } if a.MediaGroup.Description != "" { @@ -168,13 +183,7 @@ func getContent(a *atomEntry) string { } func getTitle(a *atomEntry) string { - title := "" - if a.Title.Type == "xhtml" { - title = a.Title.XML - } else { - title = a.Title.Data - } - + title := atomContentToString(a.Title) return strings.TrimSpace(sanitizer.StripTags(title)) } diff --git a/reader/atom/parser_test.go b/reader/atom/parser_test.go index 82a569a..7339bec 100644 --- a/reader/atom/parser_test.go +++ b/reader/atom/parser_test.go @@ -282,6 +282,84 @@ func TestParseEntryTitleWithXHTML(t *testing.T) { } } +func TestParseEntrySummaryWithXHTML(t *testing.T) { + data := ` + + Example Feed + + + + <code>Test</code> Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z +

Some text.

+
+ +
` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Content != "

Some text.

" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseEntrySummaryWithHTML(t *testing.T) { + data := ` + + Example Feed + + + + <code>Test</code> Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text.

]]>
+
+ +
` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Content != "

Some text.

" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + +func TestParseEntrySummaryWithPlainText(t *testing.T) { + data := ` + + Example Feed + + + + <code>Test</code> Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + ]]> + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Content != "<Some text.>" { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + } +} + func TestParseEntryWithAuthorName(t *testing.T) { data := ` -- cgit v1.2.3