diff options
author | Frédéric Guillot <fred@miniflux.net> | 2017-12-12 19:19:36 -0800 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2017-12-12 19:19:36 -0800 |
commit | ef097f02fe76572d2b1887c28da3f2bd83a993a0 (patch) | |
tree | 285dd219fab64119728e03b421ab230392ac5e3e /reader/processor | |
parent | 33445e5b681bbdffaf0925ed020ecdcc49687f15 (diff) |
Add the possibility to enable crawler for feeds
Diffstat (limited to 'reader/processor')
-rw-r--r-- | reader/processor/processor.go | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/reader/processor/processor.go b/reader/processor/processor.go index 06dad43..7cc5cb1 100644 --- a/reader/processor/processor.go +++ b/reader/processor/processor.go @@ -5,9 +5,12 @@ package processor import ( + "log" + "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/rewrite" "github.com/miniflux/miniflux2/reader/sanitizer" + "github.com/miniflux/miniflux2/reader/scraper" ) // FeedProcessor handles the processing of feed contents. @@ -15,6 +18,12 @@ type FeedProcessor struct { feed *model.Feed scraperRules string rewriteRules string + crawler bool +} + +// WithCrawler enables the crawler. +func (f *FeedProcessor) WithCrawler(value bool) { + f.crawler = value } // WithScraperRules adds scraper rules to the processing. @@ -30,6 +39,15 @@ func (f *FeedProcessor) WithRewriteRules(rules string) { // Process applies rewrite and scraper rules. func (f *FeedProcessor) Process() { for _, entry := range f.feed.Entries { + if f.crawler { + content, err := scraper.Fetch(entry.URL, f.scraperRules) + if err != nil { + log.Println("[FeedProcessor]", err) + } else { + entry.Content = content + } + } + entry.Content = sanitizer.Sanitize(entry.URL, entry.Content) entry.Content = rewrite.Rewriter(entry.URL, entry.Content, f.rewriteRules) } @@ -37,5 +55,5 @@ func (f *FeedProcessor) Process() { // NewFeedProcessor returns a new FeedProcessor. func NewFeedProcessor(feed *model.Feed) *FeedProcessor { - return &FeedProcessor{feed: feed} + return &FeedProcessor{feed: feed, crawler: false} } |