From b8f874a37d5ce57fb139e857b5cbd2276da46714 Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Sun, 14 Oct 2018 22:33:19 -0700 Subject: Simplify feed entries filtering - Rename processor package to filter - Remove boilerplate code --- reader/feed/handler.go | 20 +++++------- reader/filter/doc.go | 10 ++++++ reader/filter/filter.go | 36 ++++++++++++++++++++++ reader/processor/doc.go | 10 ------ reader/processor/processor.go | 71 ------------------------------------------- 5 files changed, 53 insertions(+), 94 deletions(-) create mode 100644 reader/filter/doc.go create mode 100644 reader/filter/filter.go delete mode 100644 reader/processor/doc.go delete mode 100644 reader/processor/processor.go (limited to 'reader') diff --git a/reader/feed/handler.go b/reader/feed/handler.go index 5c13dd4..d48a80a 100644 --- a/reader/feed/handler.go +++ b/reader/feed/handler.go @@ -14,9 +14,9 @@ import ( "miniflux.app/logger" "miniflux.app/model" "miniflux.app/reader/browser" + "miniflux.app/reader/filter" "miniflux.app/reader/icon" "miniflux.app/reader/parser" - "miniflux.app/reader/processor" "miniflux.app/storage" "miniflux.app/timer" ) @@ -63,9 +63,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, subscription.WithClientResponse(response) subscription.CheckedNow() - feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription) - feedProcessor.WithCrawler(crawler) - feedProcessor.Process() + filter.Apply(h.store, subscription) if storeErr := h.store.CreateFeed(subscription); storeErr != nil { return nil, storeErr @@ -108,22 +106,18 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) { logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID) - subscription, parseErr := parser.ParseFeed(response.String()) + updatedFeed, parseErr := parser.ParseFeed(response.String()) if parseErr != nil { originalFeed.WithError(parseErr.Localize(printer)) h.store.UpdateFeed(originalFeed) return parseErr } - feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription) - feedProcessor.WithScraperRules(originalFeed.ScraperRules) - feedProcessor.WithUserAgent(originalFeed.UserAgent) - feedProcessor.WithRewriteRules(originalFeed.RewriteRules) - feedProcessor.WithCrawler(originalFeed.Crawler) - feedProcessor.Process() + originalFeed.Entries = updatedFeed.Entries + filter.Apply(h.store, originalFeed) - // Note: We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). - if storeErr := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, subscription.Entries, !originalFeed.Crawler); storeErr != nil { + // We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). + if storeErr := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, !originalFeed.Crawler); storeErr != nil { return storeErr } diff --git a/reader/filter/doc.go b/reader/filter/doc.go new file mode 100644 index 0000000..92c18c0 --- /dev/null +++ b/reader/filter/doc.go @@ -0,0 +1,10 @@ +// Copyright 2018 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +/* + +Package filter applies a set of filters to feed entries. + +*/ +package filter // import "miniflux.app/reader/filter" diff --git a/reader/filter/filter.go b/reader/filter/filter.go new file mode 100644 index 0000000..a0b9429 --- /dev/null +++ b/reader/filter/filter.go @@ -0,0 +1,36 @@ +// Copyright 2018 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package filter + +import ( + "miniflux.app/logger" + "miniflux.app/model" + "miniflux.app/reader/rewrite" + "miniflux.app/reader/sanitizer" + "miniflux.app/reader/scraper" + "miniflux.app/storage" +) + +// Apply executes all entry filters. +func Apply(store *storage.Storage, feed *model.Feed) { + for _, entry := range feed.Entries { + if feed.Crawler { + if !store.EntryURLExists(feed.UserID, entry.URL) { + content, err := scraper.Fetch(entry.URL, feed.ScraperRules, feed.UserAgent) + if err != nil { + logger.Error("Unable to crawl this entry: %q => %v", entry.URL, err) + } else { + // We replace the entry content only if the scraper doesn't return any error. + entry.Content = content + } + } + } + + entry.Content = rewrite.Rewriter(entry.URL, entry.Content, feed.RewriteRules) + + // The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered. + entry.Content = sanitizer.Sanitize(entry.URL, entry.Content) + } +} diff --git a/reader/processor/doc.go b/reader/processor/doc.go deleted file mode 100644 index f0e7fd4..0000000 --- a/reader/processor/doc.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2018 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -/* - -Package processor handles the logic to manipulate feed contents. - -*/ -package processor // import "miniflux.app/reader/processor" diff --git a/reader/processor/processor.go b/reader/processor/processor.go deleted file mode 100644 index f57e6cd..0000000 --- a/reader/processor/processor.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2017 Frédéric Guillot. All rights reserved. -// Use of this source code is governed by the Apache 2.0 -// license that can be found in the LICENSE file. - -package processor // import "miniflux.app/reader/processor" - -import ( - "miniflux.app/logger" - "miniflux.app/model" - "miniflux.app/reader/rewrite" - "miniflux.app/reader/sanitizer" - "miniflux.app/reader/scraper" - "miniflux.app/storage" -) - -// FeedProcessor handles the processing of feed contents. -type FeedProcessor struct { - userID int64 - store *storage.Storage - feed *model.Feed - scraperRules string - rewriteRules string - crawler bool - userAgent string -} - -// WithCrawler enables the crawler. -func (f *FeedProcessor) WithCrawler(value bool) { - f.crawler = value -} - -// WithScraperRules adds scraper rules to the processing. -func (f *FeedProcessor) WithScraperRules(rules string) { - f.scraperRules = rules -} - -// WithUserAgent sets the User-Agent header for fetching article content. -func (f *FeedProcessor) WithUserAgent(userAgent string) { - f.userAgent = userAgent -} - -// WithRewriteRules adds rewrite rules to the processing. -func (f *FeedProcessor) WithRewriteRules(rules string) { - f.rewriteRules = rules -} - -// Process applies rewrite and scraper rules. -func (f *FeedProcessor) Process() { - for _, entry := range f.feed.Entries { - if f.crawler { - if f.store.EntryURLExists(f.userID, entry.URL) { - logger.Debug(`[FeedProcessor] Do not crawl existing entry URL: "%s"`, entry.URL) - } else { - content, err := scraper.Fetch(entry.URL, f.scraperRules, f.userAgent) - if err != nil { - logger.Error("[FeedProcessor] %v", err) - } else { - entry.Content = content - } - } - } - - entry.Content = rewrite.Rewriter(entry.URL, entry.Content, f.rewriteRules) - entry.Content = sanitizer.Sanitize(entry.URL, entry.Content) - } -} - -// NewFeedProcessor returns a new FeedProcessor. -func NewFeedProcessor(userID int64, store *storage.Storage, feed *model.Feed) *FeedProcessor { - return &FeedProcessor{userID: userID, store: store, feed: feed, crawler: false} -} -- cgit v1.2.3