aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2018-02-07 20:57:56 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2018-02-07 20:57:56 -0800
commit7b0bfd930839e6ebe4b64f37cbdc6efbf7dbd090 (patch)
treeccc7b93660572558c3bb5374b12faff8f9dd1e8f /reader
parentc6fd9eb9b1a26c7f029cfd97fb1637d2ab867173 (diff)
Strip invalid XML characters to avoid parsing errors
Diffstat (limited to 'reader')
-rw-r--r--reader/feed/parser.go27
1 files changed, 26 insertions, 1 deletions
diff --git a/reader/feed/parser.go b/reader/feed/parser.go
index c04836a..70c81e3 100644
--- a/reader/feed/parser.go
+++ b/reader/feed/parser.go
@@ -12,6 +12,7 @@ import (
"strings"
"time"
+ "github.com/miniflux/miniflux/logger"
"github.com/miniflux/miniflux/model"
"github.com/miniflux/miniflux/reader/atom"
"github.com/miniflux/miniflux/reader/encoding"
@@ -74,7 +75,8 @@ func parseFeed(r io.Reader) (*model.Feed, error) {
return nil, errors.New("This feed is empty")
}
- reader := bytes.NewReader(buffer.Bytes())
+ str := stripInvalidXMLCharacters(buffer.String())
+ reader := strings.NewReader(str)
format := DetectFeedFormat(reader)
reader.Seek(0, io.SeekStart)
@@ -91,3 +93,26 @@ func parseFeed(r io.Reader) (*model.Feed, error) {
return nil, errors.New("Unsupported feed format")
}
}
+
+func stripInvalidXMLCharacters(input string) string {
+ return strings.Map(func(r rune) rune {
+ if isInCharacterRange(r) {
+ return r
+ }
+
+ logger.Debug("Strip invalid XML characters: %U", r)
+ return -1
+ }, input)
+}
+
+// Decide whether the given rune is in the XML Character Range, per
+// the Char production of http://www.xml.com/axml/testaxml.htm,
+// Section 2.2 Characters.
+func isInCharacterRange(r rune) (inrange bool) {
+ return r == 0x09 ||
+ r == 0x0A ||
+ r == 0x0D ||
+ r >= 0x20 && r <= 0xDF77 ||
+ r >= 0xE000 && r <= 0xFFFD ||
+ r >= 0x10000 && r <= 0x10FFFF
+}