aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/parser/parser.go
blob: 30fc6034c27e44695d2457e96e7c72be33749883 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
// Copyright 2018 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.

package parser // import "miniflux.app/reader/parser"

import (
	"strings"

	"miniflux.app/errors"
	"miniflux.app/logger"
	"miniflux.app/model"
	"miniflux.app/reader/atom"
	"miniflux.app/reader/json"
	"miniflux.app/reader/rdf"
	"miniflux.app/reader/rss"
)

// ParseFeed analyzes the input data and returns a normalized feed object.
func ParseFeed(data string) (*model.Feed, *errors.LocalizedError) {
	data = stripInvalidXMLCharacters(data)

	switch DetectFeedFormat(data) {
	case FormatAtom:
		return atom.Parse(strings.NewReader(data))
	case FormatRSS:
		return rss.Parse(strings.NewReader(data))
	case FormatJSON:
		return json.Parse(strings.NewReader(data))
	case FormatRDF:
		return rdf.Parse(strings.NewReader(data))
	default:
		return nil, errors.NewLocalizedError("Unsupported feed format")
	}
}

func stripInvalidXMLCharacters(input string) string {
	return strings.Map(func(r rune) rune {
		if isInCharacterRange(r) {
			return r
		}

		logger.Debug("Strip invalid XML characters: %U", r)
		return -1
	}, input)
}

// Decide whether the given rune is in the XML Character Range, per
// the Char production of http://www.xml.com/axml/testaxml.htm,
// Section 2.2 Characters.
func isInCharacterRange(r rune) (inrange bool) {
	return r == 0x09 ||
		r == 0x0A ||
		r == 0x0D ||
		r >= 0x20 && r <= 0xDF77 ||
		r >= 0xE000 && r <= 0xFFFD ||
		r >= 0x10000 && r <= 0x10FFFF
}