aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/scraper/scraper.go
diff options
context:
space:
mode:
Diffstat (limited to 'reader/scraper/scraper.go')
-rw-r--r--reader/scraper/scraper.go8
1 files changed, 7 insertions, 1 deletions
diff --git a/reader/scraper/scraper.go b/reader/scraper/scraper.go
index b62d1ca..58f37d5 100644
--- a/reader/scraper/scraper.go
+++ b/reader/scraper/scraper.go
@@ -34,7 +34,7 @@ func Fetch(websiteURL, rules, userAgent string) (string, error) {
return "", errors.New("scraper: unable to download web page")
}
- if !strings.Contains(response.ContentType, "text/html") {
+ if !isWhitelistedContentType(response.ContentType) {
return "", fmt.Errorf("scraper: this resource is not a HTML document (%s)", response.ContentType)
}
@@ -99,3 +99,9 @@ func getPredefinedScraperRules(websiteURL string) string {
return ""
}
+
+func isWhitelistedContentType(contentType string) bool {
+ contentType = strings.ToLower(contentType)
+ return strings.HasPrefix(contentType, "text/html") ||
+ strings.HasPrefix(contentType, "application/xhtml+xml")
+}