aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/feed/parser.go
blob: e612f3986b567a8bdbf06eb57abcea9c1a1f41af (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
// Copyright 2017 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.

package feed

import (
	"bytes"
	"encoding/xml"
	"errors"
	"io"
	"strings"
	"time"

	"github.com/miniflux/miniflux/model"
	"github.com/miniflux/miniflux/reader/atom"
	"github.com/miniflux/miniflux/reader/json"
	"github.com/miniflux/miniflux/reader/rdf"
	"github.com/miniflux/miniflux/reader/rss"
	"github.com/miniflux/miniflux/timer"

	"golang.org/x/net/html/charset"
)

// List of feed formats.
const (
	FormatRDF     = "rdf"
	FormatRSS     = "rss"
	FormatAtom    = "atom"
	FormatJSON    = "json"
	FormatUnknown = "unknown"
)

// DetectFeedFormat detect feed format from input data.
func DetectFeedFormat(data io.Reader) string {
	defer timer.ExecutionTime(time.Now(), "[Feed:DetectFeedFormat]")

	var buffer bytes.Buffer
	tee := io.TeeReader(data, &buffer)

	decoder := xml.NewDecoder(tee)
	decoder.CharsetReader = charset.NewReaderLabel

	for {
		token, _ := decoder.Token()
		if token == nil {
			break
		}

		if element, ok := token.(xml.StartElement); ok {
			switch element.Name.Local {
			case "rss":
				return FormatRSS
			case "feed":
				return FormatAtom
			case "RDF":
				return FormatRDF
			}
		}
	}

	if strings.HasPrefix(strings.TrimSpace(buffer.String()), "{") {
		return FormatJSON
	}

	return FormatUnknown
}

func parseFeed(data io.Reader) (*model.Feed, error) {
	defer timer.ExecutionTime(time.Now(), "[Feed:ParseFeed]")

	var buffer bytes.Buffer
	io.Copy(&buffer, data)

	reader := bytes.NewReader(buffer.Bytes())
	format := DetectFeedFormat(reader)
	reader.Seek(0, io.SeekStart)

	switch format {
	case FormatAtom:
		return atom.Parse(reader)
	case FormatRSS:
		return rss.Parse(reader)
	case FormatJSON:
		return json.Parse(reader)
	case FormatRDF:
		return rdf.Parse(reader)
	default:
		return nil, errors.New("Unsupported feed format")
	}
}