From 778346b0b04bc52c89529668b37c1086bebe1674 Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Sun, 14 Oct 2018 21:43:48 -0700 Subject: Simplify feed fetcher - Add browser package to handle HTTP errors - Reduce code duplication --- model/feed.go | 45 +++++++++- model/feed_test.go | 101 ++++++++++++++++++++++ reader/browser/browser.go | 55 ++++++++++++ reader/feed/handler.go | 189 ++++++++++++------------------------------ reader/subscription/finder.go | 44 ++-------- storage/icon.go | 6 +- storage/user.go | 13 ++- ui/subscription_submit.go | 8 +- 8 files changed, 275 insertions(+), 186 deletions(-) create mode 100644 model/feed_test.go create mode 100644 reader/browser/browser.go diff --git a/model/feed.go b/model/feed.go index 0ace098..63957c6 100644 --- a/model/feed.go +++ b/model/feed.go @@ -7,9 +7,11 @@ package model // import "miniflux.app/model" import ( "fmt" "time" + + "miniflux.app/http/client" ) -// Feed represents a feed in the database. +// Feed represents a feed in the application. type Feed struct { ID int64 `json:"id"` UserID int64 `json:"user_id"` @@ -43,5 +45,46 @@ func (f *Feed) String() string { ) } +// WithClientResponse updates feed attributes from an HTTP request. +func (f *Feed) WithClientResponse(response *client.Response) { + f.EtagHeader = response.ETag + f.LastModifiedHeader = response.LastModified + f.FeedURL = response.EffectiveURL +} + +// WithCategoryID initializes the category attribute of the feed. +func (f *Feed) WithCategoryID(categoryID int64) { + f.Category = &Category{ID: categoryID} +} + +// WithBrowsingParameters defines browsing parameters. +func (f *Feed) WithBrowsingParameters(crawler bool, userAgent, username, password string) { + f.Crawler = crawler + f.UserAgent = userAgent + f.Username = username + f.Password = password +} + +// WithError adds a new error message and increment the error counter. +func (f *Feed) WithError(message string) { + f.ParsingErrorCount++ + f.ParsingErrorMsg = message +} + +// ResetErrorCounter removes all previous errors. +func (f *Feed) ResetErrorCounter() { + f.ParsingErrorCount = 0 + f.ParsingErrorMsg = "" +} + +// CheckedNow set attribute values when the feed is refreshed. +func (f *Feed) CheckedNow() { + f.CheckedAt = time.Now() + + if f.SiteURL == "" { + f.SiteURL = f.FeedURL + } +} + // Feeds is a list of feed type Feeds []*Feed diff --git a/model/feed_test.go b/model/feed_test.go new file mode 100644 index 0000000..3ca81b4 --- /dev/null +++ b/model/feed_test.go @@ -0,0 +1,101 @@ +// Copyright 2018 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package model // import "miniflux.app/reader/model" + +import ( + "testing" + + "miniflux.app/http/client" +) + +func TestFeedWithResponse(t *testing.T) { + response := &client.Response{ETag: "Some etag", LastModified: "Some date", EffectiveURL: "Some URL"} + + feed := &Feed{} + feed.WithClientResponse(response) + + if feed.EtagHeader != "Some etag" { + t.Fatal(`The ETag header should be set`) + } + + if feed.LastModifiedHeader != "Some date" { + t.Fatal(`The LastModified header should be set`) + } + + if feed.FeedURL != "Some URL" { + t.Fatal(`The Feed URL should be set`) + } +} + +func TestFeedCategorySetter(t *testing.T) { + feed := &Feed{} + feed.WithCategoryID(int64(123)) + + if feed.Category == nil { + t.Fatal(`The category field should not be null`) + } + + if feed.Category.ID != int64(123) { + t.Error(`The category ID must be set`) + } +} + +func TestFeedBrowsingParams(t *testing.T) { + feed := &Feed{} + feed.WithBrowsingParameters(true, "Custom User Agent", "Username", "Secret") + + if !feed.Crawler { + t.Error(`The crawler must be activated`) + } + + if feed.UserAgent != "Custom User Agent" { + t.Error(`The user agent must be set`) + } + + if feed.Username != "Username" { + t.Error(`The username must be set`) + } + + if feed.Password != "Secret" { + t.Error(`The password must be set`) + } +} + +func TestFeedErrorCounter(t *testing.T) { + feed := &Feed{} + feed.WithError("Some Error") + + if feed.ParsingErrorMsg != "Some Error" { + t.Error(`The error message must be set`) + } + + if feed.ParsingErrorCount != 1 { + t.Error(`The error counter must be set to 1`) + } + + feed.ResetErrorCounter() + + if feed.ParsingErrorMsg != "" { + t.Error(`The error message must be removed`) + } + + if feed.ParsingErrorCount != 0 { + t.Error(`The error counter must be set to 0`) + } +} + +func TestFeedCheckedNow(t *testing.T) { + feed := &Feed{} + feed.FeedURL = "https://example.org/feed" + feed.CheckedNow() + + if feed.SiteURL != feed.FeedURL { + t.Error(`The site URL must not be empty`) + } + + if feed.CheckedAt.IsZero() { + t.Error(`The checked date must be set`) + } +} diff --git a/reader/browser/browser.go b/reader/browser/browser.go new file mode 100644 index 0000000..733d5f5 --- /dev/null +++ b/reader/browser/browser.go @@ -0,0 +1,55 @@ +// Copyright 2018 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package browser // import "miniflux.app/reader/browser" + +import ( + "miniflux.app/errors" + "miniflux.app/http/client" +) + +var ( + errRequestFailed = "Unable to open this link: %v" + errServerFailure = "Unable to fetch this resource (Status Code = %d)" + errEncoding = "Unable to normalize encoding: %q" + errEmptyFeed = "This feed is empty" + errResourceNotFound = "Resource not found (404), this feed doesn't exists anymore, check the feed URL" + errNotAuthorized = "You are not authorized to access this resource (invalid username/password)" +) + +// Exec executes a HTTP request and handles errors. +func Exec(request *client.Client) (*client.Response, *errors.LocalizedError) { + response, err := request.Get() + if err != nil { + if e, ok := err.(*errors.LocalizedError); ok { + return nil, e + } + return nil, errors.NewLocalizedError(errRequestFailed, err) + } + + if response.IsNotFound() { + return nil, errors.NewLocalizedError(errResourceNotFound) + } + + if response.IsNotAuthorized() { + return nil, errors.NewLocalizedError(errNotAuthorized) + } + + if response.HasServerFailure() { + return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode) + } + + if response.StatusCode != 304 { + // Content-Length = -1 when no Content-Length header is sent. + if response.ContentLength == 0 { + return nil, errors.NewLocalizedError(errEmptyFeed) + } + + if err := response.EnsureUnicodeBody(); err != nil { + return nil, errors.NewLocalizedError(errEncoding, err) + } + } + + return response, nil +} diff --git a/reader/feed/handler.go b/reader/feed/handler.go index 0945948..5c13dd4 100644 --- a/reader/feed/handler.go +++ b/reader/feed/handler.go @@ -13,6 +13,7 @@ import ( "miniflux.app/locale" "miniflux.app/logger" "miniflux.app/model" + "miniflux.app/reader/browser" "miniflux.app/reader/icon" "miniflux.app/reader/parser" "miniflux.app/reader/processor" @@ -21,14 +22,9 @@ import ( ) var ( - errRequestFailed = "Unable to execute request: %v" - errServerFailure = "Unable to fetch feed (Status Code = %d)" errDuplicate = "This feed already exists (%s)" errNotFound = "Feed %d not found" - errEncoding = "Unable to normalize encoding: %q" errCategoryNotFound = "Category not found for this user" - errEmptyFeed = "This feed is empty" - errResourceNotFound = "Resource not found (404), this feed doesn't exists anymore, check the feed URL" ) // Handler contains all the logic to create and refresh feeds. @@ -44,155 +40,79 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, return nil, errors.NewLocalizedError(errCategoryNotFound) } - clt := client.New(url) - clt.WithCredentials(username, password) - clt.WithUserAgent(userAgent) - response, err := clt.Get() - if err != nil { - if _, ok := err.(*errors.LocalizedError); ok { - return nil, err - } - return nil, errors.NewLocalizedError(errRequestFailed, err) - } - - if response.HasServerFailure() { - return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode) - } - - // Content-Length = -1 when no Content-Length header is sent - if response.ContentLength == 0 { - return nil, errors.NewLocalizedError(errEmptyFeed) + request := client.New(url) + request.WithCredentials(username, password) + request.WithUserAgent(userAgent) + response, requestErr := browser.Exec(request) + if requestErr != nil { + return nil, requestErr } if h.store.FeedURLExists(userID, response.EffectiveURL) { return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL) } - if err := response.EnsureUnicodeBody(); err != nil { - return nil, errors.NewLocalizedError(errEncoding, err) + subscription, parseErr := parser.ParseFeed(response.String()) + if parseErr != nil { + return nil, parseErr } - subscription, feedErr := parser.ParseFeed(response.String()) - if feedErr != nil { - return nil, feedErr - } + subscription.UserID = userID + subscription.WithCategoryID(categoryID) + subscription.WithBrowsingParameters(crawler, userAgent, username, password) + subscription.WithClientResponse(response) + subscription.CheckedNow() feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription) feedProcessor.WithCrawler(crawler) feedProcessor.Process() - subscription.Category = &model.Category{ID: categoryID} - subscription.EtagHeader = response.ETag - subscription.LastModifiedHeader = response.LastModified - subscription.FeedURL = response.EffectiveURL - subscription.UserID = userID - subscription.Crawler = crawler - subscription.UserAgent = userAgent - subscription.Username = username - subscription.Password = password - - if subscription.SiteURL == "" { - subscription.SiteURL = subscription.FeedURL - } - - err = h.store.CreateFeed(subscription) - if err != nil { - return nil, err + if storeErr := h.store.CreateFeed(subscription); storeErr != nil { + return nil, storeErr } logger.Debug("[Handler:CreateFeed] Feed saved with ID: %d", subscription.ID) - icon, err := icon.FindIcon(subscription.SiteURL) - if err != nil { - logger.Error("[Handler:CreateFeed] %v", err) - } else if icon == nil { - logger.Info("No icon found for feedID=%d", subscription.ID) - } else { - h.store.CreateFeedIcon(subscription, icon) - } - + checkFeedIcon(h.store, subscription.ID, subscription.SiteURL) return subscription, nil } // RefreshFeed fetch and update a feed if necessary. func (h *Handler) RefreshFeed(userID, feedID int64) error { defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:RefreshFeed] feedID=%d", feedID)) - userLanguage, err := h.store.UserLanguage(userID) - if err != nil { - logger.Error("[Handler:RefreshFeed] %v", err) - userLanguage = "en_US" - } - + userLanguage := h.store.UserLanguage(userID) printer := locale.NewPrinter(userLanguage) - originalFeed, err := h.store.FeedByID(userID, feedID) - if err != nil { - return err + originalFeed, storeErr := h.store.FeedByID(userID, feedID) + if storeErr != nil { + return storeErr } if originalFeed == nil { return errors.NewLocalizedError(errNotFound, feedID) } - clt := client.New(originalFeed.FeedURL) - clt.WithCredentials(originalFeed.Username, originalFeed.Password) - clt.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) - clt.WithUserAgent(originalFeed.UserAgent) - response, err := clt.Get() - if err != nil { - var customErr errors.LocalizedError - if lerr, ok := err.(*errors.LocalizedError); ok { - customErr = *lerr - } else { - customErr = *errors.NewLocalizedError(errRequestFailed, err) - } + originalFeed.CheckedNow() - originalFeed.ParsingErrorCount++ - originalFeed.ParsingErrorMsg = customErr.Localize(printer) + request := client.New(originalFeed.FeedURL) + request.WithCredentials(originalFeed.Username, originalFeed.Password) + request.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) + request.WithUserAgent(originalFeed.UserAgent) + response, requestErr := browser.Exec(request) + if requestErr != nil { + originalFeed.WithError(requestErr.Localize(printer)) h.store.UpdateFeed(originalFeed) - return customErr - } - - originalFeed.CheckedAt = time.Now() - - if response.IsNotFound() { - err := errors.NewLocalizedError(errResourceNotFound) - originalFeed.ParsingErrorCount++ - originalFeed.ParsingErrorMsg = err.Localize(printer) - h.store.UpdateFeed(originalFeed) - return err - } - - if response.HasServerFailure() { - err := errors.NewLocalizedError(errServerFailure, response.StatusCode) - originalFeed.ParsingErrorCount++ - originalFeed.ParsingErrorMsg = err.Localize(printer) - h.store.UpdateFeed(originalFeed) - return err + return requestErr } if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) { logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID) - // Content-Length = -1 when no Content-Length header is sent - if response.ContentLength == 0 { - err := errors.NewLocalizedError(errEmptyFeed) - originalFeed.ParsingErrorCount++ - originalFeed.ParsingErrorMsg = err.Localize(printer) - h.store.UpdateFeed(originalFeed) - return err - } - - if err := response.EnsureUnicodeBody(); err != nil { - return errors.NewLocalizedError(errEncoding, err) - } - subscription, parseErr := parser.ParseFeed(response.String()) if parseErr != nil { - originalFeed.ParsingErrorCount++ - originalFeed.ParsingErrorMsg = parseErr.Localize(printer) + originalFeed.WithError(parseErr.Localize(printer)) h.store.UpdateFeed(originalFeed) - return err + return parseErr } feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription) @@ -202,34 +122,20 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { feedProcessor.WithCrawler(originalFeed.Crawler) feedProcessor.Process() - originalFeed.EtagHeader = response.ETag - originalFeed.LastModifiedHeader = response.LastModified - // Note: We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). - if err := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, subscription.Entries, !originalFeed.Crawler); err != nil { - return err + if storeErr := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, subscription.Entries, !originalFeed.Crawler); storeErr != nil { + return storeErr } - if !h.store.HasIcon(originalFeed.ID) { - logger.Debug("[Handler:RefreshFeed] Looking for feed icon") - icon, err := icon.FindIcon(originalFeed.SiteURL) - if err != nil { - logger.Debug("[Handler:RefreshFeed] %v", err) - } else { - h.store.CreateFeedIcon(originalFeed, icon) - } - } + // We update caching headers only if the feed has been modified, + // because some websites don't return the same headers when replying with a 304. + originalFeed.WithClientResponse(response) + checkFeedIcon(h.store, originalFeed.ID, originalFeed.SiteURL) } else { logger.Debug("[Handler:RefreshFeed] Feed #%d not modified", feedID) } - originalFeed.ParsingErrorCount = 0 - originalFeed.ParsingErrorMsg = "" - - if originalFeed.SiteURL == "" { - originalFeed.SiteURL = originalFeed.FeedURL - } - + originalFeed.ResetErrorCounter() return h.store.UpdateFeed(originalFeed) } @@ -237,3 +143,18 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { func NewFeedHandler(store *storage.Storage) *Handler { return &Handler{store} } + +func checkFeedIcon(store *storage.Storage, feedID int64, websiteURL string) { + if !store.HasIcon(feedID) { + icon, err := icon.FindIcon(websiteURL) + if err != nil { + logger.Error("CheckFeedIcon: %v (feedID=%d websiteURL=%s)", err, feedID, websiteURL) + } else if icon == nil { + logger.Debug("CheckFeedIcon: No icon found (feedID=%d websiteURL=%s)", feedID, websiteURL) + } else { + if err := store.CreateFeedIcon(feedID, icon); err != nil { + logger.Error("CheckFeedIcon: %v (feedID=%d websiteURL=%s)", err, feedID, websiteURL) + } + } + } +} \ No newline at end of file diff --git a/reader/subscription/finder.go b/reader/subscription/finder.go index c59c002..fab8d93 100644 --- a/reader/subscription/finder.go +++ b/reader/subscription/finder.go @@ -5,58 +5,29 @@ package subscription // import "miniflux.app/reader/subscription" import ( - "fmt" "io" "strings" - "time" "miniflux.app/errors" "miniflux.app/http/client" - "miniflux.app/logger" + "miniflux.app/reader/browser" "miniflux.app/reader/parser" - "miniflux.app/timer" "miniflux.app/url" "github.com/PuerkitoBio/goquery" ) var ( - errConnectionFailure = "Unable to open this link: %v" errUnreadableDoc = "Unable to analyze this page: %v" - errEmptyBody = "This web page is empty" - errNotAuthorized = "You are not authorized to access this resource (invalid username/password)" - errServerFailure = "Unable to fetch this resource (Status Code = %d)" ) // FindSubscriptions downloads and try to find one or more subscriptions from an URL. -func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscriptions, error) { - defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[FindSubscriptions] url=%s", websiteURL)) - - clt := client.New(websiteURL) - clt.WithCredentials(username, password) - clt.WithUserAgent(userAgent) - response, err := clt.Get() +func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscriptions, *errors.LocalizedError) { + request := client.New(websiteURL) + request.WithCredentials(username, password) + request.WithUserAgent(userAgent) + response, err := browser.Exec(request) if err != nil { - if _, ok := err.(errors.LocalizedError); ok { - return nil, err - } - return nil, errors.NewLocalizedError(errConnectionFailure, err) - } - - if response.IsNotAuthorized() { - return nil, errors.NewLocalizedError(errNotAuthorized) - } - - if response.HasServerFailure() { - return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode) - } - - // Content-Length = -1 when no Content-Length header is sent - if response.ContentLength == 0 { - return nil, errors.NewLocalizedError(errEmptyBody) - } - - if err := response.EnsureUnicodeBody(); err != nil { return nil, err } @@ -75,7 +46,7 @@ func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscr return parseDocument(response.EffectiveURL, strings.NewReader(body)) } -func parseDocument(websiteURL string, data io.Reader) (Subscriptions, error) { +func parseDocument(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) { var subscriptions Subscriptions queries := map[string]string{ "link[type='application/rss+xml']": "rss", @@ -108,7 +79,6 @@ func parseDocument(websiteURL string, data io.Reader) (Subscriptions, error) { } if subscription.URL != "" { - logger.Debug("[FindSubscriptions] %s", subscription) subscriptions = append(subscriptions, subscription) } }) diff --git a/storage/icon.go b/storage/icon.go index 3a3686d..aa9f99e 100644 --- a/storage/icon.go +++ b/storage/icon.go @@ -100,8 +100,8 @@ func (s *Storage) CreateIcon(icon *model.Icon) error { } // CreateFeedIcon creates an icon and associate the icon to the given feed. -func (s *Storage) CreateFeedIcon(feed *model.Feed, icon *model.Icon) error { - defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Storage:CreateFeedIcon] feedID=%d", feed.ID)) +func (s *Storage) CreateFeedIcon(feedID int64, icon *model.Icon) error { + defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Storage:CreateFeedIcon] feedID=%d", feedID)) err := s.IconByHash(icon) if err != nil { @@ -115,7 +115,7 @@ func (s *Storage) CreateFeedIcon(feed *model.Feed, icon *model.Icon) error { } } - _, err = s.db.Exec(`INSERT INTO feed_icons (feed_id, icon_id) VALUES ($1, $2)`, feed.ID, icon.ID) + _, err = s.db.Exec(`INSERT INTO feed_icons (feed_id, icon_id) VALUES ($1, $2)`, feedID, icon.ID) if err != nil { return fmt.Errorf("unable to create feed icon: %v", err) } diff --git a/storage/user.go b/storage/user.go index 9d584d5..e91e3b8 100644 --- a/storage/user.go +++ b/storage/user.go @@ -175,15 +175,14 @@ func (s *Storage) UpdateUser(user *model.User) error { } // UserLanguage returns the language of the given user. -func (s *Storage) UserLanguage(userID int64) (language string, err error) { +func (s *Storage) UserLanguage(userID int64) (language string) { defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Storage:UserLanguage] userID=%d", userID)) - err = s.db.QueryRow(`SELECT language FROM users WHERE id = $1`, userID).Scan(&language) - if err == sql.ErrNoRows { - return "en_US", nil - } else if err != nil { - return "", fmt.Errorf("unable to fetch user language: %v", err) + err := s.db.QueryRow(`SELECT language FROM users WHERE id = $1`, userID).Scan(&language) + if err != nil { + return "en_US" } - return language, nil + + return language } // UserByID finds a user by the ID. diff --git a/ui/subscription_submit.go b/ui/subscription_submit.go index 801e516..bdf6a59 100644 --- a/ui/subscription_submit.go +++ b/ui/subscription_submit.go @@ -50,16 +50,16 @@ func (c *Controller) SubmitSubscription(w http.ResponseWriter, r *http.Request) return } - subscriptions, err := subscription.FindSubscriptions( + subscriptions, findErr := subscription.FindSubscriptions( subscriptionForm.URL, subscriptionForm.UserAgent, subscriptionForm.Username, subscriptionForm.Password, ) - if err != nil { - logger.Error("[Controller:SubmitSubscription] %v", err) + if findErr != nil { + logger.Error("[UI:SubmitSubscription] %s", findErr) v.Set("form", subscriptionForm) - v.Set("errorMessage", err) + v.Set("errorMessage", findErr) html.OK(w, r, v.Render("add_subscription")) return } -- cgit v1.2.3