diff options
Diffstat (limited to 'reader/xml/decoder.go')
-rw-r--r-- | reader/xml/decoder.go | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/reader/xml/decoder.go b/reader/xml/decoder.go new file mode 100644 index 0000000..d01f74e --- /dev/null +++ b/reader/xml/decoder.go @@ -0,0 +1,50 @@ +// Copyright 2019 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package xml // import "miniflux.app/reader/xml" + +import ( + "bytes" + "encoding/xml" + "fmt" + "io" + "io/ioutil" + + "miniflux.app/reader/encoding" +) + +// NewDecoder returns a XML decoder that filters illegal characters. +func NewDecoder(data io.Reader) *xml.Decoder { + decoder := xml.NewDecoder(data) + decoder.Entity = xml.HTMLEntity + decoder.Strict = false + decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + utf8Reader, err := encoding.CharsetReader(charset, input) + if err != nil { + return nil, err + } + rawData, err := ioutil.ReadAll(utf8Reader) + if err != nil { + return nil, fmt.Errorf("Unable to read data: %q", err) + } + filteredBytes := bytes.Map(filterValidXMLChar, rawData) + return bytes.NewReader(filteredBytes), nil + } + + return decoder +} + +// This function is copied from encoding/xml package, +// and is used to check if all the characters are legal. +func filterValidXMLChar(r rune) rune { + if r == 0x09 || + r == 0x0A || + r == 0x0D || + r >= 0x20 && r <= 0xD7FF || + r >= 0xE000 && r <= 0xFFFD || + r >= 0x10000 && r <= 0x10FFFF { + return r + } + return -1 +} |