From 778346b0b04bc52c89529668b37c1086bebe1674 Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Sun, 14 Oct 2018 21:43:48 -0700 Subject: Simplify feed fetcher - Add browser package to handle HTTP errors - Reduce code duplication --- reader/browser/browser.go | 55 ++++++++++++ reader/feed/handler.go | 189 ++++++++++++------------------------------ reader/subscription/finder.go | 44 ++-------- 3 files changed, 117 insertions(+), 171 deletions(-) create mode 100644 reader/browser/browser.go (limited to 'reader') diff --git a/reader/browser/browser.go b/reader/browser/browser.go new file mode 100644 index 0000000..733d5f5 --- /dev/null +++ b/reader/browser/browser.go @@ -0,0 +1,55 @@ +// Copyright 2018 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package browser // import "miniflux.app/reader/browser" + +import ( + "miniflux.app/errors" + "miniflux.app/http/client" +) + +var ( + errRequestFailed = "Unable to open this link: %v" + errServerFailure = "Unable to fetch this resource (Status Code = %d)" + errEncoding = "Unable to normalize encoding: %q" + errEmptyFeed = "This feed is empty" + errResourceNotFound = "Resource not found (404), this feed doesn't exists anymore, check the feed URL" + errNotAuthorized = "You are not authorized to access this resource (invalid username/password)" +) + +// Exec executes a HTTP request and handles errors. +func Exec(request *client.Client) (*client.Response, *errors.LocalizedError) { + response, err := request.Get() + if err != nil { + if e, ok := err.(*errors.LocalizedError); ok { + return nil, e + } + return nil, errors.NewLocalizedError(errRequestFailed, err) + } + + if response.IsNotFound() { + return nil, errors.NewLocalizedError(errResourceNotFound) + } + + if response.IsNotAuthorized() { + return nil, errors.NewLocalizedError(errNotAuthorized) + } + + if response.HasServerFailure() { + return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode) + } + + if response.StatusCode != 304 { + // Content-Length = -1 when no Content-Length header is sent. + if response.ContentLength == 0 { + return nil, errors.NewLocalizedError(errEmptyFeed) + } + + if err := response.EnsureUnicodeBody(); err != nil { + return nil, errors.NewLocalizedError(errEncoding, err) + } + } + + return response, nil +} diff --git a/reader/feed/handler.go b/reader/feed/handler.go index 0945948..5c13dd4 100644 --- a/reader/feed/handler.go +++ b/reader/feed/handler.go @@ -13,6 +13,7 @@ import ( "miniflux.app/locale" "miniflux.app/logger" "miniflux.app/model" + "miniflux.app/reader/browser" "miniflux.app/reader/icon" "miniflux.app/reader/parser" "miniflux.app/reader/processor" @@ -21,14 +22,9 @@ import ( ) var ( - errRequestFailed = "Unable to execute request: %v" - errServerFailure = "Unable to fetch feed (Status Code = %d)" errDuplicate = "This feed already exists (%s)" errNotFound = "Feed %d not found" - errEncoding = "Unable to normalize encoding: %q" errCategoryNotFound = "Category not found for this user" - errEmptyFeed = "This feed is empty" - errResourceNotFound = "Resource not found (404), this feed doesn't exists anymore, check the feed URL" ) // Handler contains all the logic to create and refresh feeds. @@ -44,155 +40,79 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, return nil, errors.NewLocalizedError(errCategoryNotFound) } - clt := client.New(url) - clt.WithCredentials(username, password) - clt.WithUserAgent(userAgent) - response, err := clt.Get() - if err != nil { - if _, ok := err.(*errors.LocalizedError); ok { - return nil, err - } - return nil, errors.NewLocalizedError(errRequestFailed, err) - } - - if response.HasServerFailure() { - return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode) - } - - // Content-Length = -1 when no Content-Length header is sent - if response.ContentLength == 0 { - return nil, errors.NewLocalizedError(errEmptyFeed) + request := client.New(url) + request.WithCredentials(username, password) + request.WithUserAgent(userAgent) + response, requestErr := browser.Exec(request) + if requestErr != nil { + return nil, requestErr } if h.store.FeedURLExists(userID, response.EffectiveURL) { return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL) } - if err := response.EnsureUnicodeBody(); err != nil { - return nil, errors.NewLocalizedError(errEncoding, err) + subscription, parseErr := parser.ParseFeed(response.String()) + if parseErr != nil { + return nil, parseErr } - subscription, feedErr := parser.ParseFeed(response.String()) - if feedErr != nil { - return nil, feedErr - } + subscription.UserID = userID + subscription.WithCategoryID(categoryID) + subscription.WithBrowsingParameters(crawler, userAgent, username, password) + subscription.WithClientResponse(response) + subscription.CheckedNow() feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription) feedProcessor.WithCrawler(crawler) feedProcessor.Process() - subscription.Category = &model.Category{ID: categoryID} - subscription.EtagHeader = response.ETag - subscription.LastModifiedHeader = response.LastModified - subscription.FeedURL = response.EffectiveURL - subscription.UserID = userID - subscription.Crawler = crawler - subscription.UserAgent = userAgent - subscription.Username = username - subscription.Password = password - - if subscription.SiteURL == "" { - subscription.SiteURL = subscription.FeedURL - } - - err = h.store.CreateFeed(subscription) - if err != nil { - return nil, err + if storeErr := h.store.CreateFeed(subscription); storeErr != nil { + return nil, storeErr } logger.Debug("[Handler:CreateFeed] Feed saved with ID: %d", subscription.ID) - icon, err := icon.FindIcon(subscription.SiteURL) - if err != nil { - logger.Error("[Handler:CreateFeed] %v", err) - } else if icon == nil { - logger.Info("No icon found for feedID=%d", subscription.ID) - } else { - h.store.CreateFeedIcon(subscription, icon) - } - + checkFeedIcon(h.store, subscription.ID, subscription.SiteURL) return subscription, nil } // RefreshFeed fetch and update a feed if necessary. func (h *Handler) RefreshFeed(userID, feedID int64) error { defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:RefreshFeed] feedID=%d", feedID)) - userLanguage, err := h.store.UserLanguage(userID) - if err != nil { - logger.Error("[Handler:RefreshFeed] %v", err) - userLanguage = "en_US" - } - + userLanguage := h.store.UserLanguage(userID) printer := locale.NewPrinter(userLanguage) - originalFeed, err := h.store.FeedByID(userID, feedID) - if err != nil { - return err + originalFeed, storeErr := h.store.FeedByID(userID, feedID) + if storeErr != nil { + return storeErr } if originalFeed == nil { return errors.NewLocalizedError(errNotFound, feedID) } - clt := client.New(originalFeed.FeedURL) - clt.WithCredentials(originalFeed.Username, originalFeed.Password) - clt.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) - clt.WithUserAgent(originalFeed.UserAgent) - response, err := clt.Get() - if err != nil { - var customErr errors.LocalizedError - if lerr, ok := err.(*errors.LocalizedError); ok { - customErr = *lerr - } else { - customErr = *errors.NewLocalizedError(errRequestFailed, err) - } + originalFeed.CheckedNow() - originalFeed.ParsingErrorCount++ - originalFeed.ParsingErrorMsg = customErr.Localize(printer) + request := client.New(originalFeed.FeedURL) + request.WithCredentials(originalFeed.Username, originalFeed.Password) + request.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) + request.WithUserAgent(originalFeed.UserAgent) + response, requestErr := browser.Exec(request) + if requestErr != nil { + originalFeed.WithError(requestErr.Localize(printer)) h.store.UpdateFeed(originalFeed) - return customErr - } - - originalFeed.CheckedAt = time.Now() - - if response.IsNotFound() { - err := errors.NewLocalizedError(errResourceNotFound) - originalFeed.ParsingErrorCount++ - originalFeed.ParsingErrorMsg = err.Localize(printer) - h.store.UpdateFeed(originalFeed) - return err - } - - if response.HasServerFailure() { - err := errors.NewLocalizedError(errServerFailure, response.StatusCode) - originalFeed.ParsingErrorCount++ - originalFeed.ParsingErrorMsg = err.Localize(printer) - h.store.UpdateFeed(originalFeed) - return err + return requestErr } if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) { logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID) - // Content-Length = -1 when no Content-Length header is sent - if response.ContentLength == 0 { - err := errors.NewLocalizedError(errEmptyFeed) - originalFeed.ParsingErrorCount++ - originalFeed.ParsingErrorMsg = err.Localize(printer) - h.store.UpdateFeed(originalFeed) - return err - } - - if err := response.EnsureUnicodeBody(); err != nil { - return errors.NewLocalizedError(errEncoding, err) - } - subscription, parseErr := parser.ParseFeed(response.String()) if parseErr != nil { - originalFeed.ParsingErrorCount++ - originalFeed.ParsingErrorMsg = parseErr.Localize(printer) + originalFeed.WithError(parseErr.Localize(printer)) h.store.UpdateFeed(originalFeed) - return err + return parseErr } feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription) @@ -202,34 +122,20 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { feedProcessor.WithCrawler(originalFeed.Crawler) feedProcessor.Process() - originalFeed.EtagHeader = response.ETag - originalFeed.LastModifiedHeader = response.LastModified - // Note: We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). - if err := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, subscription.Entries, !originalFeed.Crawler); err != nil { - return err + if storeErr := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, subscription.Entries, !originalFeed.Crawler); storeErr != nil { + return storeErr } - if !h.store.HasIcon(originalFeed.ID) { - logger.Debug("[Handler:RefreshFeed] Looking for feed icon") - icon, err := icon.FindIcon(originalFeed.SiteURL) - if err != nil { - logger.Debug("[Handler:RefreshFeed] %v", err) - } else { - h.store.CreateFeedIcon(originalFeed, icon) - } - } + // We update caching headers only if the feed has been modified, + // because some websites don't return the same headers when replying with a 304. + originalFeed.WithClientResponse(response) + checkFeedIcon(h.store, originalFeed.ID, originalFeed.SiteURL) } else { logger.Debug("[Handler:RefreshFeed] Feed #%d not modified", feedID) } - originalFeed.ParsingErrorCount = 0 - originalFeed.ParsingErrorMsg = "" - - if originalFeed.SiteURL == "" { - originalFeed.SiteURL = originalFeed.FeedURL - } - + originalFeed.ResetErrorCounter() return h.store.UpdateFeed(originalFeed) } @@ -237,3 +143,18 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { func NewFeedHandler(store *storage.Storage) *Handler { return &Handler{store} } + +func checkFeedIcon(store *storage.Storage, feedID int64, websiteURL string) { + if !store.HasIcon(feedID) { + icon, err := icon.FindIcon(websiteURL) + if err != nil { + logger.Error("CheckFeedIcon: %v (feedID=%d websiteURL=%s)", err, feedID, websiteURL) + } else if icon == nil { + logger.Debug("CheckFeedIcon: No icon found (feedID=%d websiteURL=%s)", feedID, websiteURL) + } else { + if err := store.CreateFeedIcon(feedID, icon); err != nil { + logger.Error("CheckFeedIcon: %v (feedID=%d websiteURL=%s)", err, feedID, websiteURL) + } + } + } +} \ No newline at end of file diff --git a/reader/subscription/finder.go b/reader/subscription/finder.go index c59c002..fab8d93 100644 --- a/reader/subscription/finder.go +++ b/reader/subscription/finder.go @@ -5,58 +5,29 @@ package subscription // import "miniflux.app/reader/subscription" import ( - "fmt" "io" "strings" - "time" "miniflux.app/errors" "miniflux.app/http/client" - "miniflux.app/logger" + "miniflux.app/reader/browser" "miniflux.app/reader/parser" - "miniflux.app/timer" "miniflux.app/url" "github.com/PuerkitoBio/goquery" ) var ( - errConnectionFailure = "Unable to open this link: %v" errUnreadableDoc = "Unable to analyze this page: %v" - errEmptyBody = "This web page is empty" - errNotAuthorized = "You are not authorized to access this resource (invalid username/password)" - errServerFailure = "Unable to fetch this resource (Status Code = %d)" ) // FindSubscriptions downloads and try to find one or more subscriptions from an URL. -func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscriptions, error) { - defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[FindSubscriptions] url=%s", websiteURL)) - - clt := client.New(websiteURL) - clt.WithCredentials(username, password) - clt.WithUserAgent(userAgent) - response, err := clt.Get() +func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscriptions, *errors.LocalizedError) { + request := client.New(websiteURL) + request.WithCredentials(username, password) + request.WithUserAgent(userAgent) + response, err := browser.Exec(request) if err != nil { - if _, ok := err.(errors.LocalizedError); ok { - return nil, err - } - return nil, errors.NewLocalizedError(errConnectionFailure, err) - } - - if response.IsNotAuthorized() { - return nil, errors.NewLocalizedError(errNotAuthorized) - } - - if response.HasServerFailure() { - return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode) - } - - // Content-Length = -1 when no Content-Length header is sent - if response.ContentLength == 0 { - return nil, errors.NewLocalizedError(errEmptyBody) - } - - if err := response.EnsureUnicodeBody(); err != nil { return nil, err } @@ -75,7 +46,7 @@ func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscr return parseDocument(response.EffectiveURL, strings.NewReader(body)) } -func parseDocument(websiteURL string, data io.Reader) (Subscriptions, error) { +func parseDocument(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) { var subscriptions Subscriptions queries := map[string]string{ "link[type='application/rss+xml']": "rss", @@ -108,7 +79,6 @@ func parseDocument(websiteURL string, data io.Reader) (Subscriptions, error) { } if subscription.URL != "" { - logger.Debug("[FindSubscriptions] %s", subscription) subscriptions = append(subscriptions, subscription) } }) -- cgit v1.2.3