From 2538eea1776e1d03d33465ad2001512caca93937 Mon Sep 17 00:00:00 2001 From: Patrick Date: Thu, 20 Sep 2018 03:19:24 +0200 Subject: Add the possibility to override default user agent for each feed --- reader/feed/handler.go | 6 +++++- reader/processor/processor.go | 8 +++++++- reader/scraper/scraper.go | 6 +++++- reader/subscription/finder.go | 3 ++- 4 files changed, 19 insertions(+), 4 deletions(-) (limited to 'reader') diff --git a/reader/feed/handler.go b/reader/feed/handler.go index fa09cb6..252d178 100644 --- a/reader/feed/handler.go +++ b/reader/feed/handler.go @@ -37,7 +37,7 @@ type Handler struct { } // CreateFeed fetch, parse and store a new feed. -func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, username, password string) (*model.Feed, error) { +func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, userAgent, username, password string) (*model.Feed, error) { defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:CreateFeed] feedUrl=%s", url)) if !h.store.CategoryExists(userID, categoryID) { @@ -46,6 +46,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, clt := client.New(url) clt.WithCredentials(username, password) + clt.WithUserAgent(userAgent) response, err := clt.Get() if err != nil { if _, ok := err.(*errors.LocalizedError); ok { @@ -87,6 +88,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, subscription.FeedURL = response.EffectiveURL subscription.UserID = userID subscription.Crawler = crawler + subscription.UserAgent = userAgent subscription.Username = username subscription.Password = password @@ -136,6 +138,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { clt := client.New(originalFeed.FeedURL) clt.WithCredentials(originalFeed.Username, originalFeed.Password) clt.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) + clt.WithUserAgent(originalFeed.UserAgent) response, err := clt.Get() if err != nil { var customErr errors.LocalizedError @@ -196,6 +199,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription) feedProcessor.WithScraperRules(originalFeed.ScraperRules) + feedProcessor.WithUserAgent(originalFeed.UserAgent) feedProcessor.WithRewriteRules(originalFeed.RewriteRules) feedProcessor.WithCrawler(originalFeed.Crawler) feedProcessor.Process() diff --git a/reader/processor/processor.go b/reader/processor/processor.go index 002f7e8..f57e6cd 100644 --- a/reader/processor/processor.go +++ b/reader/processor/processor.go @@ -21,6 +21,7 @@ type FeedProcessor struct { scraperRules string rewriteRules string crawler bool + userAgent string } // WithCrawler enables the crawler. @@ -33,6 +34,11 @@ func (f *FeedProcessor) WithScraperRules(rules string) { f.scraperRules = rules } +// WithUserAgent sets the User-Agent header for fetching article content. +func (f *FeedProcessor) WithUserAgent(userAgent string) { + f.userAgent = userAgent +} + // WithRewriteRules adds rewrite rules to the processing. func (f *FeedProcessor) WithRewriteRules(rules string) { f.rewriteRules = rules @@ -45,7 +51,7 @@ func (f *FeedProcessor) Process() { if f.store.EntryURLExists(f.userID, entry.URL) { logger.Debug(`[FeedProcessor] Do not crawl existing entry URL: "%s"`, entry.URL) } else { - content, err := scraper.Fetch(entry.URL, f.scraperRules) + content, err := scraper.Fetch(entry.URL, f.scraperRules, f.userAgent) if err != nil { logger.Error("[FeedProcessor] %v", err) } else { diff --git a/reader/scraper/scraper.go b/reader/scraper/scraper.go index d2cccdb..7aa7084 100644 --- a/reader/scraper/scraper.go +++ b/reader/scraper/scraper.go @@ -19,8 +19,12 @@ import ( ) // Fetch downloads a web page a returns relevant contents. -func Fetch(websiteURL, rules string) (string, error) { +func Fetch(websiteURL, rules, userAgent string) (string, error) { clt := client.New(websiteURL) + if userAgent != "" { + clt.WithUserAgent(userAgent) + } + response, err := clt.Get() if err != nil { return "", err diff --git a/reader/subscription/finder.go b/reader/subscription/finder.go index 8be6f73..027e810 100644 --- a/reader/subscription/finder.go +++ b/reader/subscription/finder.go @@ -29,11 +29,12 @@ var ( ) // FindSubscriptions downloads and try to find one or more subscriptions from an URL. -func FindSubscriptions(websiteURL, username, password string) (Subscriptions, error) { +func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscriptions, error) { defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[FindSubscriptions] url=%s", websiteURL)) clt := client.New(websiteURL) clt.WithCredentials(username, password) + clt.WithUserAgent(userAgent) response, err := clt.Get() if err != nil { if _, ok := err.(errors.LocalizedError); ok { -- cgit v1.2.3