diff options
Diffstat (limited to 'vendor/github.com/PuerkitoBio')
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/.travis.yml | 16 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/README.md | 57 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/array.go | 25 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/array_test.go | 22 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/bench_filter_test.go | 2 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/example_test.go | 54 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/expand.go | 24 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/expand_test.go | 22 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/manipulation.go | 7 | ||||
-rwxr-xr-x | vendor/github.com/PuerkitoBio/goquery/misc/git/pre-commit | 15 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/query.go | 6 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/type.go | 12 |
12 files changed, 224 insertions, 38 deletions
diff --git a/vendor/github.com/PuerkitoBio/goquery/.travis.yml b/vendor/github.com/PuerkitoBio/goquery/.travis.yml index bc1e7cb..b019a93 100644 --- a/vendor/github.com/PuerkitoBio/goquery/.travis.yml +++ b/vendor/github.com/PuerkitoBio/goquery/.travis.yml @@ -2,10 +2,14 @@ language: go go: - 1.1 - - 1.2 - - 1.3 - - 1.4 - - 1.5 - - 1.6 - - 1.7 + - 1.2.x + - 1.3.x + - 1.4.x + - 1.5.x + - 1.6.x + - 1.7.x + - 1.8.x + - 1.9.x + - "1.10.x" - tip + diff --git a/vendor/github.com/PuerkitoBio/goquery/README.md b/vendor/github.com/PuerkitoBio/goquery/README.md index ce09cb9..5e899d4 100644 --- a/vendor/github.com/PuerkitoBio/goquery/README.md +++ b/vendor/github.com/PuerkitoBio/goquery/README.md @@ -1,4 +1,5 @@ -# goquery - a little like that j-thing, only in Go [![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.png)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) +# goquery - a little like that j-thing, only in Go +[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge) goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off. @@ -6,6 +7,16 @@ Also, because the net/html parser requires UTF-8 encoding, so does goquery: it i Syntax-wise, it is as close as possible to jQuery, with the same function names when possible, and that warm and fuzzy chainable interface. jQuery being the ultra-popular library that it is, I felt that writing a similar HTML-manipulating library was better to follow its API than to start anew (in the same spirit as Go's `fmt` package), even though some of its methods are less than intuitive (looking at you, [index()][index]...). +## Table of Contents + +* [Installation](#installation) +* [Changelog](#changelog) +* [API](#api) +* [Examples](#examples) +* [Related Projects](#related-projects) +* [Support](#support) +* [License](#license) + ## Installation Please note that because of the net/html dependency, goquery requires Go1.1+. @@ -26,6 +37,10 @@ Please note that because of the net/html dependency, goquery requires Go1.1+. **Note that goquery's API is now stable, and will not break.** +* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples. +* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`. +* **2018-01-28 (v1.3.0)** : Add `ToEnd` constant to `Slice` until the end of the selection (thanks to @davidjwilkins for raising the issue). +* **2018-01-11 (v1.2.0)** : Add `AddBack*` and deprecate `AndSelf` (thanks to @davidjwilkins). * **2017-02-12 (v1.1.0)** : Add `SetHtml` and `SetText` (thanks to @glebtv). * **2016-12-29 (v1.0.2)** : Optimize allocations for `Selection.Text` (thanks to @radovskyb). * **2016-08-28 (v1.0.1)** : Optimize performance for large documents. @@ -81,12 +96,24 @@ package main import ( "fmt" "log" + "net/http" "github.com/PuerkitoBio/goquery" ) func ExampleScrape() { - doc, err := goquery.NewDocument("http://metalsucks.net") + // Request the HTML page. + res, err := http.Get("http://metalsucks.net") + if err != nil { + log.Fatal(err) + } + defer res.Body.Close() + if res.StatusCode != 200 { + log.Fatalf("status code error: %d %s", res.StatusCode, res.Status) + } + + // Load the HTML document + doc, err := goquery.NewDocumentFromReader(res.Body) if err != nil { log.Fatal(err) } @@ -105,6 +132,30 @@ func main() { } ``` +## Related Projects + +- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags. +- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery. +- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors. +- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework +- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets. + +## Support + +There are a number of ways you can support the project: + +* Use it, star it, build something with it, spread the word! + - If you do build something open-source or otherwise publicly-visible, let me know so I can add it to the [Related Projects](#related-projects) section! +* Raise issues to improve the project (note: doc typos and clarifications are issues too!) + - Please search existing issues before opening a new one - it may have already been adressed. +* Pull requests: please discuss new code in an issue first, unless the fix is really trivial. + - Make sure new code is tested. + - Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue. + +If you desperately want to send money my way, I have a BuyMeACoffee.com page: + +<a href="https://www.buymeacoffee.com/mna" target="_blank"><img src="https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png" alt="Buy Me A Coffee" style="height: 41px !important;width: 174px !important;box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;-webkit-box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;" ></a> + ## License The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia's license is [here][caslic]. @@ -112,6 +163,7 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia' [jquery]: http://jquery.com/ [go]: http://golang.org/ [cascadia]: https://github.com/andybalholm/cascadia +[cascadiacli]: https://github.com/suntong/cascadia [bsd]: http://opensource.org/licenses/BSD-3-Clause [golic]: http://golang.org/LICENSE [caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE @@ -122,3 +174,4 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia' [wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks [thatguystone]: https://github.com/thatguystone [piotr]: https://github.com/piotrkowalczuk +[goq]: https://github.com/andrewstuart/goq diff --git a/vendor/github.com/PuerkitoBio/goquery/array.go b/vendor/github.com/PuerkitoBio/goquery/array.go index d7af5ee..1b1f6cb 100644 --- a/vendor/github.com/PuerkitoBio/goquery/array.go +++ b/vendor/github.com/PuerkitoBio/goquery/array.go @@ -4,6 +4,16 @@ import ( "golang.org/x/net/html" ) +const ( + maxUint = ^uint(0) + maxInt = int(maxUint >> 1) + + // ToEnd is a special index value that can be used as end index in a call + // to Slice so that all elements are selected until the end of the Selection. + // It is equivalent to passing (*Selection).Length(). + ToEnd = maxInt +) + // First reduces the set of matched elements to the first in the set. // It returns a new Selection object, and an empty Selection object if the // the selection is empty. @@ -35,12 +45,23 @@ func (s *Selection) Eq(index int) *Selection { } // Slice reduces the set of matched elements to a subset specified by a range -// of indices. +// of indices. The start index is 0-based and indicates the index of the first +// element to select. The end index is 0-based and indicates the index at which +// the elements stop being selected (the end index is not selected). +// +// The indices may be negative, in which case they represent an offset from the +// end of the selection. +// +// The special value ToEnd may be specified as end index, in which case all elements +// until the end are selected. This works both for a positive and negative start +// index. func (s *Selection) Slice(start, end int) *Selection { if start < 0 { start += len(s.Nodes) } - if end < 0 { + if end == ToEnd { + end = len(s.Nodes) + } else if end < 0 { end += len(s.Nodes) } return pushStack(s, s.Nodes[start:end]) diff --git a/vendor/github.com/PuerkitoBio/goquery/array_test.go b/vendor/github.com/PuerkitoBio/goquery/array_test.go index 8e50f75..7857b38 100644 --- a/vendor/github.com/PuerkitoBio/goquery/array_test.go +++ b/vendor/github.com/PuerkitoBio/goquery/array_test.go @@ -98,6 +98,17 @@ func TestSlice(t *testing.T) { sel := Doc().Find(".pvk-content").Slice(0, 2) assertLength(t, sel.Nodes, 2) + assertSelectionIs(t, sel, "#pc1", "#pc2") +} + +func TestSliceToEnd(t *testing.T) { + sel := Doc().Find(".pvk-content").Slice(1, ToEnd) + + assertLength(t, sel.Nodes, 2) + assertSelectionIs(t, sel.Eq(0), "#pc2") + if _, ok := sel.Eq(1).Attr("id"); ok { + t.Error("Want no attribute ID, got one") + } } func TestSliceEmpty(t *testing.T) { @@ -110,6 +121,11 @@ func TestSliceInvalid(t *testing.T) { Doc().Find("").Slice(0, 2) } +func TestSliceInvalidToEnd(t *testing.T) { + defer assertPanic(t) + Doc().Find("").Slice(2, ToEnd) +} + func TestSliceOutOfBounds(t *testing.T) { defer assertPanic(t) Doc().Find(".pvk-content").Slice(2, 12) @@ -135,6 +151,12 @@ func TestNegativeSliceBoth(t *testing.T) { assertSelectionIs(t, sel.Eq(1), "#cf3") } +func TestNegativeSliceToEnd(t *testing.T) { + sel := Doc().Find(".container-fluid").Slice(-3, ToEnd) + assertLength(t, sel.Nodes, 3) + assertSelectionIs(t, sel, "#cf2", "#cf3", "#cf4") +} + func TestNegativeSliceOutOfBounds(t *testing.T) { defer assertPanic(t) Doc().Find(".container-fluid").Slice(-12, -7) diff --git a/vendor/github.com/PuerkitoBio/goquery/bench_filter_test.go b/vendor/github.com/PuerkitoBio/goquery/bench_filter_test.go index 61641c8..38e39f5 100644 --- a/vendor/github.com/PuerkitoBio/goquery/bench_filter_test.go +++ b/vendor/github.com/PuerkitoBio/goquery/bench_filter_test.go @@ -231,6 +231,6 @@ func BenchmarkEnd(b *testing.B) { } } if n != 373 { - b.Fatalf("wnat 373, got %d", n) + b.Fatalf("want 373, got %d", n) } } diff --git a/vendor/github.com/PuerkitoBio/goquery/example_test.go b/vendor/github.com/PuerkitoBio/goquery/example_test.go index 17b2354..bc97f05 100644 --- a/vendor/github.com/PuerkitoBio/goquery/example_test.go +++ b/vendor/github.com/PuerkitoBio/goquery/example_test.go @@ -3,14 +3,27 @@ package goquery_test import ( "fmt" "log" + "net/http" + "os" + "strings" "github.com/PuerkitoBio/goquery" ) // This example scrapes the reviews shown on the home page of metalsucks.net. func Example() { + // Request the HTML page. + res, err := http.Get("http://metalsucks.net") + if err != nil { + log.Fatal(err) + } + defer res.Body.Close() + if res.StatusCode != 200 { + log.Fatalf("status code error: %d %s", res.StatusCode, res.Status) + } + // Load the HTML document - doc, err := goquery.NewDocument("http://metalsucks.net") + doc, err := goquery.NewDocumentFromReader(res.Body) if err != nil { log.Fatal(err) } @@ -28,3 +41,42 @@ func Example() { // xOutput: voluntarily fail the Example output. } + +// This example shows how to use NewDocumentFromReader from a file. +func ExampleNewDocumentFromReader_file() { + // create from a file + f, err := os.Open("some/file.html") + if err != nil { + log.Fatal(err) + } + defer f.Close() + doc, err := goquery.NewDocumentFromReader(f) + if err != nil { + log.Fatal(err) + } + // use the goquery document... + _ = doc.Find("h1") +} + +// This example shows how to use NewDocumentFromReader from a string. +func ExampleNewDocumentFromReader_string() { + // create from a string + data := ` +<html> + <head> + <title>My document</title> + </head> + <body> + <h1>Header</h1> + </body> +</html>` + + doc, err := goquery.NewDocumentFromReader(strings.NewReader(data)) + if err != nil { + log.Fatal(err) + } + header := doc.Find("h1").Text() + fmt.Println(header) + + // Output: Header +} diff --git a/vendor/github.com/PuerkitoBio/goquery/expand.go b/vendor/github.com/PuerkitoBio/goquery/expand.go index f0c6c86..7caade5 100644 --- a/vendor/github.com/PuerkitoBio/goquery/expand.go +++ b/vendor/github.com/PuerkitoBio/goquery/expand.go @@ -41,6 +41,30 @@ func (s *Selection) AddNodes(nodes ...*html.Node) *Selection { // AndSelf adds the previous set of elements on the stack to the current set. // It returns a new Selection object containing the current Selection combined // with the previous one. +// Deprecated: This function has been deprecated and is now an alias for AddBack(). func (s *Selection) AndSelf() *Selection { + return s.AddBack() +} + +// AddBack adds the previous set of elements on the stack to the current set. +// It returns a new Selection object containing the current Selection combined +// with the previous one. +func (s *Selection) AddBack() *Selection { return s.AddSelection(s.prevSel) } + +// AddBackFiltered reduces the previous set of elements on the stack to those that +// match the selector string, and adds them to the current set. +// It returns a new Selection object containing the current Selection combined +// with the filtered previous one +func (s *Selection) AddBackFiltered(selector string) *Selection { + return s.AddSelection(s.prevSel.Filter(selector)) +} + +// AddBackMatcher reduces the previous set of elements on the stack to those that match +// the mateher, and adds them to the curernt set. +// It returns a new Selection object containing the current Selection combined +// with the filtered previous one +func (s *Selection) AddBackMatcher(m Matcher) *Selection { + return s.AddSelection(s.prevSel.FilterMatcher(m)) +} diff --git a/vendor/github.com/PuerkitoBio/goquery/expand_test.go b/vendor/github.com/PuerkitoBio/goquery/expand_test.go index 4557025..c034dc6 100644 --- a/vendor/github.com/PuerkitoBio/goquery/expand_test.go +++ b/vendor/github.com/PuerkitoBio/goquery/expand_test.go @@ -94,3 +94,25 @@ func TestAndSelfRollback(t *testing.T) { sel2 := sel.Find("a").AndSelf().End().End() assertEqual(t, sel, sel2) } + +func TestAddBack(t *testing.T) { + sel := Doc().Find(".span12").Last().AddBack() + assertLength(t, sel.Nodes, 2) +} + +func TestAddBackRollback(t *testing.T) { + sel := Doc().Find(".pvk-content") + sel2 := sel.Find("a").AddBack().End().End() + assertEqual(t, sel, sel2) +} + +func TestAddBackFiltered(t *testing.T) { + sel := Doc().Find(".span12, .footer").Find("h1").AddBackFiltered(".footer") + assertLength(t, sel.Nodes, 2) +} + +func TestAddBackFilteredRollback(t *testing.T) { + sel := Doc().Find(".span12, .footer") + sel2 := sel.Find("h1").AddBackFiltered(".footer").End().End() + assertEqual(t, sel, sel2) +} diff --git a/vendor/github.com/PuerkitoBio/goquery/manipulation.go b/vendor/github.com/PuerkitoBio/goquery/manipulation.go index ebb4ffe..34eb757 100644 --- a/vendor/github.com/PuerkitoBio/goquery/manipulation.go +++ b/vendor/github.com/PuerkitoBio/goquery/manipulation.go @@ -270,13 +270,14 @@ func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection { return s.Remove() } -// Set the html content of each element in the selection to specified html string. +// SetHtml sets the html content of each element in the selection to +// specified html string. func (s *Selection) SetHtml(html string) *Selection { return setHtmlNodes(s, parseHtml(html)...) } -// Set the content of each element in the selection to specified content. The -// provided text string is escaped. +// SetText sets the content of each element in the selection to specified content. +// The provided text string is escaped. func (s *Selection) SetText(text string) *Selection { return s.SetHtml(html.EscapeString(text)) } diff --git a/vendor/github.com/PuerkitoBio/goquery/misc/git/pre-commit b/vendor/github.com/PuerkitoBio/goquery/misc/git/pre-commit index 6a3d798..78e4331 100755 --- a/vendor/github.com/PuerkitoBio/goquery/misc/git/pre-commit +++ b/vendor/github.com/PuerkitoBio/goquery/misc/git/pre-commit @@ -18,21 +18,6 @@ gosimple $(go list ./... | grep -v /vendor/) echo "<<< gosimple" echo -echo ">>> staticcheck" -staticcheck $(go list ./... | grep -v /vendor/) -echo "<<< staticcheck" -echo - -echo ">>> unused" -unused $(go list ./... | grep -v /vendor/) -echo "<<< unused" -echo - -echo ">>> gas" -gas $(find . -name "*.go" | grep -v /vendor/ | grep -v '_test.go$') -echo "<<< gas" -echo - # Check for gofmt problems and report if any. gofiles=$(git diff --cached --name-only --diff-filter=ACM | grep '.go$' | grep -v /vendor/) [ -z "$gofiles" ] && echo "EXIT $vetres" && exit $vetres diff --git a/vendor/github.com/PuerkitoBio/goquery/query.go b/vendor/github.com/PuerkitoBio/goquery/query.go index 1a7f873..fe86bf0 100644 --- a/vendor/github.com/PuerkitoBio/goquery/query.go +++ b/vendor/github.com/PuerkitoBio/goquery/query.go @@ -5,11 +5,7 @@ import "golang.org/x/net/html" // Is checks the current matched set of elements against a selector and // returns true if at least one of these elements matches. func (s *Selection) Is(selector string) bool { - if len(s.Nodes) > 0 { - return s.IsMatcher(compileMatcher(selector)) - } - - return false + return s.IsMatcher(compileMatcher(selector)) } // IsMatcher checks the current matched set of elements against a matcher and diff --git a/vendor/github.com/PuerkitoBio/goquery/type.go b/vendor/github.com/PuerkitoBio/goquery/type.go index e2169fa..6ad51db 100644 --- a/vendor/github.com/PuerkitoBio/goquery/type.go +++ b/vendor/github.com/PuerkitoBio/goquery/type.go @@ -31,6 +31,10 @@ func NewDocumentFromNode(root *html.Node) *Document { // NewDocument is a Document constructor that takes a string URL as argument. // It loads the specified document, parses it, and stores the root Document // node, ready to be manipulated. +// +// Deprecated: Use the net/http standard library package to make the request +// and validate the response before calling goquery.NewDocumentFromReader +// with the response's body. func NewDocument(url string) (*Document, error) { // Load the URL res, e := http.Get(url) @@ -40,10 +44,10 @@ func NewDocument(url string) (*Document, error) { return NewDocumentFromResponse(res) } -// NewDocumentFromReader returns a Document from a generic reader. +// NewDocumentFromReader returns a Document from an io.Reader. // It returns an error as second value if the reader's data cannot be parsed -// as html. It does *not* check if the reader is also an io.Closer, so the -// provided reader is never closed by this call, it is the responsibility +// as html. It does not check if the reader is also an io.Closer, the +// provided reader is never closed by this call. It is the responsibility // of the caller to close it if required. func NewDocumentFromReader(r io.Reader) (*Document, error) { root, e := html.Parse(r) @@ -56,6 +60,8 @@ func NewDocumentFromReader(r io.Reader) (*Document, error) { // NewDocumentFromResponse is another Document constructor that takes an http response as argument. // It loads the specified response's document, parses it, and stores the root Document // node, ready to be manipulated. The response's body is closed on return. +// +// Deprecated: Use goquery.NewDocumentFromReader with the response's body. func NewDocumentFromResponse(res *http.Response) (*Document, error) { if res == nil { return nil, errors.New("Response is nil") |