diff options
author | Frédéric Guillot <fred@miniflux.net> | 2017-11-19 21:10:04 -0800 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2017-11-19 22:01:46 -0800 |
commit | 8ffb773f43c8dc54801ca1d111854e7e881c93c9 (patch) | |
tree | 38133a2fc612597a75fed1d13e5b4042f58a2b7e /reader/subscription |
First commit
Diffstat (limited to 'reader/subscription')
-rw-r--r-- | reader/subscription/finder.go | 96 | ||||
-rw-r--r-- | reader/subscription/subscription.go | 21 |
2 files changed, 117 insertions, 0 deletions
diff --git a/reader/subscription/finder.go b/reader/subscription/finder.go new file mode 100644 index 0000000..7314644 --- /dev/null +++ b/reader/subscription/finder.go @@ -0,0 +1,96 @@ +// Copyright 2017 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package subscription + +import ( + "bytes" + "fmt" + "github.com/miniflux/miniflux2/errors" + "github.com/miniflux/miniflux2/helper" + "github.com/miniflux/miniflux2/reader/feed" + "github.com/miniflux/miniflux2/reader/http" + "github.com/miniflux/miniflux2/reader/url" + "io" + "log" + "time" + + "github.com/PuerkitoBio/goquery" +) + +var ( + errConnectionFailure = "Unable to open this link: %v" + errUnreadableDoc = "Unable to analyze this page: %v" +) + +// FindSubscriptions downloads and try to find one or more subscriptions from an URL. +func FindSubscriptions(websiteURL string) (Subscriptions, error) { + defer helper.ExecutionTime(time.Now(), fmt.Sprintf("[FindSubscriptions] url=%s", websiteURL)) + + client := http.NewHttpClient(websiteURL) + response, err := client.Get() + if err != nil { + return nil, errors.NewLocalizedError(errConnectionFailure, err) + } + + var buffer bytes.Buffer + io.Copy(&buffer, response.Body) + reader := bytes.NewReader(buffer.Bytes()) + + if format := feed.DetectFeedFormat(reader); format != feed.FormatUnknown { + var subscriptions Subscriptions + subscriptions = append(subscriptions, &Subscription{ + Title: response.EffectiveURL, + URL: response.EffectiveURL, + Type: format, + }) + + return subscriptions, nil + } + + reader.Seek(0, io.SeekStart) + return parseDocument(response.EffectiveURL, bytes.NewReader(buffer.Bytes())) +} + +func parseDocument(websiteURL string, data io.Reader) (Subscriptions, error) { + var subscriptions Subscriptions + queries := map[string]string{ + "link[type='application/rss+xml']": "rss", + "link[type='application/atom+xml']": "atom", + "link[type='application/json']": "json", + } + + doc, err := goquery.NewDocumentFromReader(data) + if err != nil { + return nil, errors.NewLocalizedError(errUnreadableDoc, err) + } + + for query, kind := range queries { + doc.Find(query).Each(func(i int, s *goquery.Selection) { + subscription := new(Subscription) + subscription.Type = kind + + if title, exists := s.Attr("title"); exists { + subscription.Title = title + } else { + subscription.Title = "Feed" + } + + if feedURL, exists := s.Attr("href"); exists { + subscription.URL, _ = url.GetAbsoluteURL(websiteURL, feedURL) + } + + if subscription.Title == "" { + subscription.Title = subscription.URL + } + + if subscription.URL != "" { + log.Println("[FindSubscriptions]", subscription) + subscriptions = append(subscriptions, subscription) + } + }) + } + + return subscriptions, nil +} diff --git a/reader/subscription/subscription.go b/reader/subscription/subscription.go new file mode 100644 index 0000000..f619f73 --- /dev/null +++ b/reader/subscription/subscription.go @@ -0,0 +1,21 @@ +// Copyright 2017 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package subscription + +import "fmt" + +// Subscription represents a feed subscription. +type Subscription struct { + Title string `json:"title"` + URL string `json:"url"` + Type string `json:"type"` +} + +func (s Subscription) String() string { + return fmt.Sprintf(`Title="%s", URL="%s", Type="%s"`, s.Title, s.URL, s.Type) +} + +// Subscriptions represents a list of subscription. +type Subscriptions []*Subscription |