aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/subscription/finder.go
blob: 239baf7eec68a9686d3679dc762767d982a0b3c4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
// Copyright 2017 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.

package subscription

import (
	"bytes"
	"fmt"
	"io"
	"log"
	"time"

	"github.com/miniflux/miniflux2/errors"
	"github.com/miniflux/miniflux2/helper"
	"github.com/miniflux/miniflux2/http"
	"github.com/miniflux/miniflux2/reader/feed"
	"github.com/miniflux/miniflux2/url"

	"github.com/PuerkitoBio/goquery"
)

var (
	errConnectionFailure = "Unable to open this link: %v"
	errUnreadableDoc     = "Unable to analyze this page: %v"
)

// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
func FindSubscriptions(websiteURL string) (Subscriptions, error) {
	defer helper.ExecutionTime(time.Now(), fmt.Sprintf("[FindSubscriptions] url=%s", websiteURL))

	client := http.NewClient(websiteURL)
	response, err := client.Get()
	if err != nil {
		return nil, errors.NewLocalizedError(errConnectionFailure, err)
	}

	var buffer bytes.Buffer
	io.Copy(&buffer, response.Body)
	reader := bytes.NewReader(buffer.Bytes())

	if format := feed.DetectFeedFormat(reader); format != feed.FormatUnknown {
		var subscriptions Subscriptions
		subscriptions = append(subscriptions, &Subscription{
			Title: response.EffectiveURL,
			URL:   response.EffectiveURL,
			Type:  format,
		})

		return subscriptions, nil
	}

	reader.Seek(0, io.SeekStart)
	return parseDocument(response.EffectiveURL, bytes.NewReader(buffer.Bytes()))
}

func parseDocument(websiteURL string, data io.Reader) (Subscriptions, error) {
	var subscriptions Subscriptions
	queries := map[string]string{
		"link[type='application/rss+xml']":  "rss",
		"link[type='application/atom+xml']": "atom",
		"link[type='application/json']":     "json",
	}

	doc, err := goquery.NewDocumentFromReader(data)
	if err != nil {
		return nil, errors.NewLocalizedError(errUnreadableDoc, err)
	}

	for query, kind := range queries {
		doc.Find(query).Each(func(i int, s *goquery.Selection) {
			subscription := new(Subscription)
			subscription.Type = kind

			if title, exists := s.Attr("title"); exists {
				subscription.Title = title
			} else {
				subscription.Title = "Feed"
			}

			if feedURL, exists := s.Attr("href"); exists {
				subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
			}

			if subscription.Title == "" {
				subscription.Title = subscription.URL
			}

			if subscription.URL != "" {
				log.Println("[FindSubscriptions]", subscription)
				subscriptions = append(subscriptions, subscription)
			}
		})
	}

	return subscriptions, nil
}