aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/parser/parser.go
diff options
context:
space:
mode:
Diffstat (limited to 'reader/parser/parser.go')
-rw-r--r--reader/parser/parser.go58
1 files changed, 58 insertions, 0 deletions
diff --git a/reader/parser/parser.go b/reader/parser/parser.go
new file mode 100644
index 0000000..30fc603
--- /dev/null
+++ b/reader/parser/parser.go
@@ -0,0 +1,58 @@
+// Copyright 2018 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package parser // import "miniflux.app/reader/parser"
+
+import (
+ "strings"
+
+ "miniflux.app/errors"
+ "miniflux.app/logger"
+ "miniflux.app/model"
+ "miniflux.app/reader/atom"
+ "miniflux.app/reader/json"
+ "miniflux.app/reader/rdf"
+ "miniflux.app/reader/rss"
+)
+
+// ParseFeed analyzes the input data and returns a normalized feed object.
+func ParseFeed(data string) (*model.Feed, *errors.LocalizedError) {
+ data = stripInvalidXMLCharacters(data)
+
+ switch DetectFeedFormat(data) {
+ case FormatAtom:
+ return atom.Parse(strings.NewReader(data))
+ case FormatRSS:
+ return rss.Parse(strings.NewReader(data))
+ case FormatJSON:
+ return json.Parse(strings.NewReader(data))
+ case FormatRDF:
+ return rdf.Parse(strings.NewReader(data))
+ default:
+ return nil, errors.NewLocalizedError("Unsupported feed format")
+ }
+}
+
+func stripInvalidXMLCharacters(input string) string {
+ return strings.Map(func(r rune) rune {
+ if isInCharacterRange(r) {
+ return r
+ }
+
+ logger.Debug("Strip invalid XML characters: %U", r)
+ return -1
+ }, input)
+}
+
+// Decide whether the given rune is in the XML Character Range, per
+// the Char production of http://www.xml.com/axml/testaxml.htm,
+// Section 2.2 Characters.
+func isInCharacterRange(r rune) (inrange bool) {
+ return r == 0x09 ||
+ r == 0x0A ||
+ r == 0x0D ||
+ r >= 0x20 && r <= 0xDF77 ||
+ r >= 0xE000 && r <= 0xFFFD ||
+ r >= 0x10000 && r <= 0x10FFFF
+}