diff options
Diffstat (limited to 'reader/parser/parser.go')
-rw-r--r-- | reader/parser/parser.go | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/reader/parser/parser.go b/reader/parser/parser.go new file mode 100644 index 0000000..30fc603 --- /dev/null +++ b/reader/parser/parser.go @@ -0,0 +1,58 @@ +// Copyright 2018 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package parser // import "miniflux.app/reader/parser" + +import ( + "strings" + + "miniflux.app/errors" + "miniflux.app/logger" + "miniflux.app/model" + "miniflux.app/reader/atom" + "miniflux.app/reader/json" + "miniflux.app/reader/rdf" + "miniflux.app/reader/rss" +) + +// ParseFeed analyzes the input data and returns a normalized feed object. +func ParseFeed(data string) (*model.Feed, *errors.LocalizedError) { + data = stripInvalidXMLCharacters(data) + + switch DetectFeedFormat(data) { + case FormatAtom: + return atom.Parse(strings.NewReader(data)) + case FormatRSS: + return rss.Parse(strings.NewReader(data)) + case FormatJSON: + return json.Parse(strings.NewReader(data)) + case FormatRDF: + return rdf.Parse(strings.NewReader(data)) + default: + return nil, errors.NewLocalizedError("Unsupported feed format") + } +} + +func stripInvalidXMLCharacters(input string) string { + return strings.Map(func(r rune) rune { + if isInCharacterRange(r) { + return r + } + + logger.Debug("Strip invalid XML characters: %U", r) + return -1 + }, input) +} + +// Decide whether the given rune is in the XML Character Range, per +// the Char production of http://www.xml.com/axml/testaxml.htm, +// Section 2.2 Characters. +func isInCharacterRange(r rune) (inrange bool) { + return r == 0x09 || + r == 0x0A || + r == 0x0D || + r >= 0x20 && r <= 0xDF77 || + r >= 0xE000 && r <= 0xFFFD || + r >= 0x10000 && r <= 0x10FFFF +} |