diff options
author | Frédéric Guillot <fred@miniflux.net> | 2017-12-12 19:19:36 -0800 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2017-12-12 19:19:36 -0800 |
commit | ef097f02fe76572d2b1887c28da3f2bd83a993a0 (patch) | |
tree | 285dd219fab64119728e03b421ab230392ac5e3e /reader | |
parent | 33445e5b681bbdffaf0925ed020ecdcc49687f15 (diff) |
Add the possibility to enable crawler for feeds
Diffstat (limited to 'reader')
-rw-r--r-- | reader/feed/handler.go | 5 | ||||
-rw-r--r-- | reader/processor/processor.go | 20 | ||||
-rw-r--r-- | reader/scraper/scraper.go | 5 |
3 files changed, 25 insertions, 5 deletions
diff --git a/reader/feed/handler.go b/reader/feed/handler.go index 7a98613..6b94627 100644 --- a/reader/feed/handler.go +++ b/reader/feed/handler.go @@ -33,7 +33,7 @@ type Handler struct { } // CreateFeed fetch, parse and store a new feed. -func (h *Handler) CreateFeed(userID, categoryID int64, url string) (*model.Feed, error) { +func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool) (*model.Feed, error) { defer helper.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:CreateFeed] feedUrl=%s", url)) if !h.store.CategoryExists(userID, categoryID) { @@ -65,6 +65,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string) (*model.Feed, } feedProcessor := processor.NewFeedProcessor(subscription) + feedProcessor.WithCrawler(crawler) feedProcessor.Process() subscription.Category = &model.Category{ID: categoryID} @@ -72,6 +73,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string) (*model.Feed, subscription.LastModifiedHeader = response.LastModified subscription.FeedURL = response.EffectiveURL subscription.UserID = userID + subscription.Crawler = crawler err = h.store.CreateFeed(subscription) if err != nil { @@ -143,6 +145,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { feedProcessor := processor.NewFeedProcessor(subscription) feedProcessor.WithScraperRules(originalFeed.ScraperRules) feedProcessor.WithRewriteRules(originalFeed.RewriteRules) + feedProcessor.WithCrawler(originalFeed.Crawler) feedProcessor.Process() originalFeed.EtagHeader = response.ETag diff --git a/reader/processor/processor.go b/reader/processor/processor.go index 06dad43..7cc5cb1 100644 --- a/reader/processor/processor.go +++ b/reader/processor/processor.go @@ -5,9 +5,12 @@ package processor import ( + "log" + "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/rewrite" "github.com/miniflux/miniflux2/reader/sanitizer" + "github.com/miniflux/miniflux2/reader/scraper" ) // FeedProcessor handles the processing of feed contents. @@ -15,6 +18,12 @@ type FeedProcessor struct { feed *model.Feed scraperRules string rewriteRules string + crawler bool +} + +// WithCrawler enables the crawler. +func (f *FeedProcessor) WithCrawler(value bool) { + f.crawler = value } // WithScraperRules adds scraper rules to the processing. @@ -30,6 +39,15 @@ func (f *FeedProcessor) WithRewriteRules(rules string) { // Process applies rewrite and scraper rules. func (f *FeedProcessor) Process() { for _, entry := range f.feed.Entries { + if f.crawler { + content, err := scraper.Fetch(entry.URL, f.scraperRules) + if err != nil { + log.Println("[FeedProcessor]", err) + } else { + entry.Content = content + } + } + entry.Content = sanitizer.Sanitize(entry.URL, entry.Content) entry.Content = rewrite.Rewriter(entry.URL, entry.Content, f.rewriteRules) } @@ -37,5 +55,5 @@ func (f *FeedProcessor) Process() { // NewFeedProcessor returns a new FeedProcessor. func NewFeedProcessor(feed *model.Feed) *FeedProcessor { - return &FeedProcessor{feed: feed} + return &FeedProcessor{feed: feed, crawler: false} } diff --git a/reader/scraper/scraper.go b/reader/scraper/scraper.go index b79a088..e799ad0 100644 --- a/reader/scraper/scraper.go +++ b/reader/scraper/scraper.go @@ -13,7 +13,6 @@ import ( "github.com/PuerkitoBio/goquery" "github.com/miniflux/miniflux2/http" "github.com/miniflux/miniflux2/reader/readability" - "github.com/miniflux/miniflux2/reader/sanitizer" "github.com/miniflux/miniflux2/url" ) @@ -34,11 +33,11 @@ func Fetch(websiteURL, rules string) (string, error) { return "", err } - var content string if rules == "" { rules = getPredefinedScraperRules(websiteURL) } + var content string if rules != "" { log.Printf(`[Scraper] Using rules "%s" for "%s"`, rules, websiteURL) content, err = scrapContent(page, rules) @@ -51,7 +50,7 @@ func Fetch(websiteURL, rules string) (string, error) { return "", err } - return sanitizer.Sanitize(websiteURL, content), nil + return content, nil } func scrapContent(page io.Reader, rules string) (string, error) { |