aboutsummaryrefslogtreecommitdiffhomepage
path: root/reader/scraper
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <fred@miniflux.net>2017-12-10 19:01:38 -0800
committerGravatar Frédéric Guillot <fred@miniflux.net>2017-12-10 19:01:38 -0800
commit7a35c58f53d76356292e3e0ca9c91add3595a9e0 (patch)
tree99d2720d78049751d18033cb5ccafedc91f596c2 /reader/scraper
parentb75a9987ba99047efe846b8f196bc5a28b7474c1 (diff)
Add readability package to fetch original content
Diffstat (limited to 'reader/scraper')
-rw-r--r--reader/scraper/scraper.go38
1 files changed, 38 insertions, 0 deletions
diff --git a/reader/scraper/scraper.go b/reader/scraper/scraper.go
new file mode 100644
index 0000000..6c51862
--- /dev/null
+++ b/reader/scraper/scraper.go
@@ -0,0 +1,38 @@
+// Copyright 2017 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package scraper
+
+import (
+ "errors"
+
+ "github.com/miniflux/miniflux2/http"
+ "github.com/miniflux/miniflux2/reader/readability"
+ "github.com/miniflux/miniflux2/reader/sanitizer"
+)
+
+// Fetch download a web page a returns relevant contents.
+func Fetch(websiteURL string) (string, error) {
+ client := http.NewClient(websiteURL)
+ response, err := client.Get()
+ if err != nil {
+ return "", err
+ }
+
+ if response.HasServerFailure() {
+ return "", errors.New("unable to download web page")
+ }
+
+ page, err := response.NormalizeBodyEncoding()
+ if err != nil {
+ return "", err
+ }
+
+ content, err := readability.ExtractContent(page)
+ if err != nil {
+ return "", err
+ }
+
+ return sanitizer.Sanitize(websiteURL, content), nil
+}