From 713b38e34c6d4a232af871659793bb9674e020f7 Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Fri, 19 Jan 2018 22:42:55 -0800 Subject: Handle more encoding edge cases - Feeds with charset specified only in Content-Type header and not in XML document - Feeds with charset specified in both places - Feeds with charset specified only in XML document and not in HTTP header --- reader/feed/parser.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'reader/feed') diff --git a/reader/feed/parser.go b/reader/feed/parser.go index e612f39..a2b39ed 100644 --- a/reader/feed/parser.go +++ b/reader/feed/parser.go @@ -14,12 +14,11 @@ import ( "github.com/miniflux/miniflux/model" "github.com/miniflux/miniflux/reader/atom" + "github.com/miniflux/miniflux/reader/encoding" "github.com/miniflux/miniflux/reader/json" "github.com/miniflux/miniflux/reader/rdf" "github.com/miniflux/miniflux/reader/rss" "github.com/miniflux/miniflux/timer" - - "golang.org/x/net/html/charset" ) // List of feed formats. @@ -32,14 +31,14 @@ const ( ) // DetectFeedFormat detect feed format from input data. -func DetectFeedFormat(data io.Reader) string { +func DetectFeedFormat(r io.Reader) string { defer timer.ExecutionTime(time.Now(), "[Feed:DetectFeedFormat]") var buffer bytes.Buffer - tee := io.TeeReader(data, &buffer) + tee := io.TeeReader(r, &buffer) decoder := xml.NewDecoder(tee) - decoder.CharsetReader = charset.NewReaderLabel + decoder.CharsetReader = encoding.CharsetReader for { token, _ := decoder.Token() @@ -66,11 +65,11 @@ func DetectFeedFormat(data io.Reader) string { return FormatUnknown } -func parseFeed(data io.Reader) (*model.Feed, error) { +func parseFeed(r io.Reader) (*model.Feed, error) { defer timer.ExecutionTime(time.Now(), "[Feed:ParseFeed]") var buffer bytes.Buffer - io.Copy(&buffer, data) + io.Copy(&buffer, r) reader := bytes.NewReader(buffer.Bytes()) format := DetectFeedFormat(reader) -- cgit v1.2.3