aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/scraper
diff options
context:
space:
mode:
Diffstat (limited to 'reader/scraper')
-rw-r--r--reader/scraper/scraper.go38
1 files changed, 38 insertions, 0 deletions
diff --git a/reader/scraper/scraper.go b/reader/scraper/scraper.go
new file mode 100644
index 0000000..6c51862
--- /dev/null
+++ b/reader/scraper/scraper.go
@@ -0,0 +1,38 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package scraper
+
+import (
+ "errors"
+
+ "github.com/miniflux/miniflux2/http"
+ "github.com/miniflux/miniflux2/reader/readability"
+ "github.com/miniflux/miniflux2/reader/sanitizer"
+)
+
+// Fetch download a web page a returns relevant contents.
+func Fetch(websiteURL string) (string, error) {
+ client := http.NewClient(websiteURL)
+ response, err := client.Get()
+ if err != nil {
+ return "", err
+ }
+
+ if response.HasServerFailure() {
+ return "", errors.New("unable to download web page")
+ }
+
+ page, err := response.NormalizeBodyEncoding()
+ if err != nil {
+ return "", err
+ }
+
+ content, err := readability.ExtractContent(page)
+ if err != nil {
+ return "", err
+ }
+
+ return sanitizer.Sanitize(websiteURL, content), nil
+}