aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2018-10-14 22:33:19 -0700
committerGravatar Frédéric Guillot <fred@miniflux.net>2018-10-14 22:33:19 -0700
commitb8f874a37d5ce57fb139e857b5cbd2276da46714 (patch)
treeb3754ad08399590d2a2cc2ad780792c534ecc431
parent234b3710d4d3bc3b5cb5e56feaf74ceabfb9ef41 (diff)
Simplify feed entries filtering
- Rename processor package to filter - Remove boilerplate code
-rw-r--r--reader/feed/handler.go20
-rw-r--r--reader/filter/doc.go (renamed from reader/processor/doc.go)4
-rw-r--r--reader/filter/filter.go36
-rw-r--r--reader/processor/processor.go71
4 files changed, 45 insertions, 86 deletions
diff --git a/reader/feed/handler.go b/reader/feed/handler.go
index 5c13dd4..d48a80a 100644
--- a/reader/feed/handler.go
+++ b/reader/feed/handler.go
@@ -14,9 +14,9 @@ import (
"miniflux.app/logger"
"miniflux.app/model"
"miniflux.app/reader/browser"
+ "miniflux.app/reader/filter"
"miniflux.app/reader/icon"
"miniflux.app/reader/parser"
- "miniflux.app/reader/processor"
"miniflux.app/storage"
"miniflux.app/timer"
)
@@ -63,9 +63,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool,
subscription.WithClientResponse(response)
subscription.CheckedNow()
- feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription)
- feedProcessor.WithCrawler(crawler)
- feedProcessor.Process()
+ filter.Apply(h.store, subscription)
if storeErr := h.store.CreateFeed(subscription); storeErr != nil {
return nil, storeErr
@@ -108,22 +106,18 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error {
if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID)
- subscription, parseErr := parser.ParseFeed(response.String())
+ updatedFeed, parseErr := parser.ParseFeed(response.String())
if parseErr != nil {
originalFeed.WithError(parseErr.Localize(printer))
h.store.UpdateFeed(originalFeed)
return parseErr
}
- feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription)
- feedProcessor.WithScraperRules(originalFeed.ScraperRules)
- feedProcessor.WithUserAgent(originalFeed.UserAgent)
- feedProcessor.WithRewriteRules(originalFeed.RewriteRules)
- feedProcessor.WithCrawler(originalFeed.Crawler)
- feedProcessor.Process()
+ originalFeed.Entries = updatedFeed.Entries
+ filter.Apply(h.store, originalFeed)
- // Note: We don't update existing entries when the crawler is enabled (we crawl only inexisting entries).
- if storeErr := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, subscription.Entries, !originalFeed.Crawler); storeErr != nil {
+ // We don't update existing entries when the crawler is enabled (we crawl only inexisting entries).
+ if storeErr := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, !originalFeed.Crawler); storeErr != nil {
return storeErr
}
diff --git a/reader/processor/doc.go b/reader/filter/doc.go
index f0e7fd4..92c18c0 100644
--- a/reader/processor/doc.go
+++ b/reader/filter/doc.go
@@ -4,7 +4,7 @@
/*
-Package processor handles the logic to manipulate feed contents.
+Package filter applies a set of filters to feed entries.
*/
-package processor // import "miniflux.app/reader/processor"
+package filter // import "miniflux.app/reader/filter"
diff --git a/reader/filter/filter.go b/reader/filter/filter.go
new file mode 100644
index 0000000..a0b9429
--- /dev/null
+++ b/reader/filter/filter.go
@@ -0,0 +1,36 @@
+// Copyright 2018 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package filter
+
+import (
+ "miniflux.app/logger"
+ "miniflux.app/model"
+ "miniflux.app/reader/rewrite"
+ "miniflux.app/reader/sanitizer"
+ "miniflux.app/reader/scraper"
+ "miniflux.app/storage"
+)
+
+// Apply executes all entry filters.
+func Apply(store *storage.Storage, feed *model.Feed) {
+ for _, entry := range feed.Entries {
+ if feed.Crawler {
+ if !store.EntryURLExists(feed.UserID, entry.URL) {
+ content, err := scraper.Fetch(entry.URL, feed.ScraperRules, feed.UserAgent)
+ if err != nil {
+ logger.Error("Unable to crawl this entry: %q => %v", entry.URL, err)
+ } else {
+ // We replace the entry content only if the scraper doesn't return any error.
+ entry.Content = content
+ }
+ }
+ }
+
+ entry.Content = rewrite.Rewriter(entry.URL, entry.Content, feed.RewriteRules)
+
+ // The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered.
+ entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
+ }
+}
diff --git a/reader/processor/processor.go b/reader/processor/processor.go
deleted file mode 100644
index f57e6cd..0000000
--- a/reader/processor/processor.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2017 Frédéric Guillot. All rights reserved.
-// Use of this source code is governed by the Apache 2.0
-// license that can be found in the LICENSE file.
-
-package processor // import "miniflux.app/reader/processor"
-
-import (
- "miniflux.app/logger"
- "miniflux.app/model"
- "miniflux.app/reader/rewrite"
- "miniflux.app/reader/sanitizer"
- "miniflux.app/reader/scraper"
- "miniflux.app/storage"
-)
-
-// FeedProcessor handles the processing of feed contents.
-type FeedProcessor struct {
- userID int64
- store *storage.Storage
- feed *model.Feed
- scraperRules string
- rewriteRules string
- crawler bool
- userAgent string
-}
-
-// WithCrawler enables the crawler.
-func (f *FeedProcessor) WithCrawler(value bool) {
- f.crawler = value
-}
-
-// WithScraperRules adds scraper rules to the processing.
-func (f *FeedProcessor) WithScraperRules(rules string) {
- f.scraperRules = rules
-}
-
-// WithUserAgent sets the User-Agent header for fetching article content.
-func (f *FeedProcessor) WithUserAgent(userAgent string) {
- f.userAgent = userAgent
-}
-
-// WithRewriteRules adds rewrite rules to the processing.
-func (f *FeedProcessor) WithRewriteRules(rules string) {
- f.rewriteRules = rules
-}
-
-// Process applies rewrite and scraper rules.
-func (f *FeedProcessor) Process() {
- for _, entry := range f.feed.Entries {
- if f.crawler {
- if f.store.EntryURLExists(f.userID, entry.URL) {
- logger.Debug(`[FeedProcessor] Do not crawl existing entry URL: "%s"`, entry.URL)
- } else {
- content, err := scraper.Fetch(entry.URL, f.scraperRules, f.userAgent)
- if err != nil {
- logger.Error("[FeedProcessor] %v", err)
- } else {
- entry.Content = content
- }
- }
- }
-
- entry.Content = rewrite.Rewriter(entry.URL, entry.Content, f.rewriteRules)
- entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
- }
-}
-
-// NewFeedProcessor returns a new FeedProcessor.
-func NewFeedProcessor(userID int64, store *storage.Storage, feed *model.Feed) *FeedProcessor {
- return &FeedProcessor{userID: userID, store: store, feed: feed, crawler: false}
-}