From ae1dc1a91eea23be14f952efb130412fe6a7996b Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Mon, 29 Oct 2018 23:00:03 -0700 Subject: Handle more encoding conversion edge cases --- reader/encoding/encoding.go | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'reader/encoding/encoding.go') diff --git a/reader/encoding/encoding.go b/reader/encoding/encoding.go index 497b2d0..7f4abda 100644 --- a/reader/encoding/encoding.go +++ b/reader/encoding/encoding.go @@ -7,6 +7,7 @@ package encoding // import "miniflux.app/reader/encoding" import ( "bytes" "io" + "io/ioutil" "unicode/utf8" "golang.org/x/net/html/charset" @@ -19,20 +20,20 @@ import ( // // Several edge cases could exists: // -// - Feeds with charset specified only in Content-Type header and not in XML document -// - Feeds with charset specified in both places -// - Feeds with charset specified only in XML document and not in HTTP header +// - Feeds with encoding specified only in Content-Type header and not in XML document +// - Feeds with encoding specified in both places +// - Feeds with encoding specified only in XML document and not in HTTP header +// - Feeds with wrong encoding defined and already in UTF-8 func CharsetReader(label string, input io.Reader) (io.Reader, error) { - var buf1, buf2 bytes.Buffer - w := io.MultiWriter(&buf1, &buf2) - io.Copy(w, input) - r := bytes.NewReader(buf2.Bytes()) + buffer, _ := ioutil.ReadAll(input) + r := bytes.NewReader(buffer) - if !utf8.Valid(buf1.Bytes()) { - // Transform document to UTF-8 from the specified XML encoding. - return charset.NewReaderLabel(label, r) + // The document is already UTF-8, do not do anything (avoid double-encoding). + // That means the specified encoding in XML prolog is wrong. + if utf8.Valid(buffer) { + return r, nil } - // The document is already UTF-8, do not do anything (avoid double-encoding) - return r, nil + // Transform document to UTF-8 from the specified encoding in XML prolog. + return charset.NewReaderLabel(label, r) } -- cgit v1.2.3