diff options
Diffstat (limited to 'vendor/github.com/andybalholm/cascadia')
71 files changed, 2372 insertions, 0 deletions
diff --git a/vendor/github.com/andybalholm/cascadia/.travis.yml b/vendor/github.com/andybalholm/cascadia/.travis.yml new file mode 100644 index 0000000..6f22751 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/.travis.yml @@ -0,0 +1,14 @@ +language: go + +go: + - 1.3 + - 1.4 + +install: + - go get github.com/andybalholm/cascadia + +script: + - go test -v + +notifications: + email: false diff --git a/vendor/github.com/andybalholm/cascadia/LICENSE b/vendor/github.com/andybalholm/cascadia/LICENSE new file mode 100755 index 0000000..ee5ad35 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2011 Andy Balholm. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/andybalholm/cascadia/README.md b/vendor/github.com/andybalholm/cascadia/README.md new file mode 100644 index 0000000..9021cb9 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/README.md @@ -0,0 +1,7 @@ +# cascadia + +[![](https://travis-ci.org/andybalholm/cascadia.svg)](https://travis-ci.org/andybalholm/cascadia) + +The Cascadia package implements CSS selectors for use with the parse trees produced by the html package. + +To test CSS selectors without writing Go code, check out [cascadia](https://github.com/suntong/cascadia) the command line tool, a thin wrapper around this package. diff --git a/vendor/github.com/andybalholm/cascadia/benchmark_test.go b/vendor/github.com/andybalholm/cascadia/benchmark_test.go new file mode 100644 index 0000000..42bf500 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/benchmark_test.go @@ -0,0 +1,53 @@ +package cascadia + +import ( + "strings" + "testing" + + "golang.org/x/net/html" +) + +func MustParseHTML(doc string) *html.Node { + dom, err := html.Parse(strings.NewReader(doc)) + if err != nil { + panic(err) + } + return dom +} + +var selector = MustCompile(`div.matched`) +var doc = `<!DOCTYPE html> +<html> +<body> +<div class="matched"> + <div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + <div class="matched"></div> + </div> +</div> +</body> +</html> +` +var dom = MustParseHTML(doc) + +func BenchmarkMatchAll(b *testing.B) { + var matches []*html.Node + for i := 0; i < b.N; i++ { + matches = selector.MatchAll(dom) + } + _ = matches +} diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test0 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test0 new file mode 100644 index 0000000..83a6561 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test0 @@ -0,0 +1 @@ +address
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test1 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test1 new file mode 100644 index 0000000..f59ec20 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test1 @@ -0,0 +1 @@ +*
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test10 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test10 new file mode 100644 index 0000000..0ee70eb --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test10 @@ -0,0 +1 @@ +p[title]
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test11 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test11 new file mode 100644 index 0000000..66340c4 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test11 @@ -0,0 +1 @@ +address[title="foo"]
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test12 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test12 new file mode 100644 index 0000000..6303f08 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test12 @@ -0,0 +1 @@ +[ title ~= foo ]
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test13 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test13 new file mode 100644 index 0000000..45d91eb --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test13 @@ -0,0 +1 @@ +[title~="hello world"]
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test14 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test14 new file mode 100644 index 0000000..62e7d68 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test14 @@ -0,0 +1 @@ +[lang|="en"]
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test15 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test15 new file mode 100644 index 0000000..fe9ab53 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test15 @@ -0,0 +1 @@ +[title^="foo"]
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test16 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test16 new file mode 100644 index 0000000..dbee733 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test16 @@ -0,0 +1 @@ +[title$="bar"]
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test17 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test17 new file mode 100644 index 0000000..e4fb403 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test17 @@ -0,0 +1 @@ +[title*="bar"]
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test18 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test18 new file mode 100644 index 0000000..6075014 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test18 @@ -0,0 +1 @@ +.t1:not(.t2)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test19 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test19 new file mode 100644 index 0000000..f04dfaf --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test19 @@ -0,0 +1 @@ +div:not(.t1)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test2 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test2 new file mode 100644 index 0000000..5529b9b --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test2 @@ -0,0 +1 @@ +#foo
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test20 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test20 new file mode 100644 index 0000000..a1c8834 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test20 @@ -0,0 +1 @@ +li:nth-child(odd)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test21 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test21 new file mode 100644 index 0000000..b99fcb6 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test21 @@ -0,0 +1 @@ +li:nth-child(even)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test22 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test22 new file mode 100644 index 0000000..a9ee217 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test22 @@ -0,0 +1 @@ +li:nth-child(-n+2)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test23 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test23 new file mode 100644 index 0000000..aad519d --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test23 @@ -0,0 +1 @@ +li:nth-child(3n+1)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test24 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test24 new file mode 100644 index 0000000..436a219 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test24 @@ -0,0 +1 @@ +li:nth-last-child(odd)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test25 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test25 new file mode 100644 index 0000000..46f6cbc --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test25 @@ -0,0 +1 @@ +li:nth-last-child(even)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test26 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test26 new file mode 100644 index 0000000..d18bf3b --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test26 @@ -0,0 +1 @@ +li:nth-last-child(-n+2)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test27 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test27 new file mode 100644 index 0000000..d1d6eb3 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test27 @@ -0,0 +1 @@ +li:nth-last-child(3n+1)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test28 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test28 new file mode 100644 index 0000000..9a0c949 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test28 @@ -0,0 +1 @@ +span:first-child
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test29 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test29 new file mode 100644 index 0000000..7058608 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test29 @@ -0,0 +1 @@ +span:last-child
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test3 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test3 new file mode 100644 index 0000000..ee02482 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test3 @@ -0,0 +1 @@ +li#t1
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test30 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test30 new file mode 100644 index 0000000..536e6ff --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test30 @@ -0,0 +1 @@ +p:nth-of-type(2)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test31 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test31 new file mode 100644 index 0000000..61c1fc7 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test31 @@ -0,0 +1 @@ +p:nth-last-of-type(2)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test32 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test32 new file mode 100644 index 0000000..d9a9dc5 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test32 @@ -0,0 +1 @@ +p:last-of-type
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test33 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test33 new file mode 100644 index 0000000..9052c41 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test33 @@ -0,0 +1 @@ +p:first-of-type
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test34 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test34 new file mode 100644 index 0000000..60bd124 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test34 @@ -0,0 +1 @@ +p:only-child
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test35 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test35 new file mode 100644 index 0000000..87d9dbc --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test35 @@ -0,0 +1 @@ +p:only-of-type
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test36 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test36 new file mode 100644 index 0000000..8e92954 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test36 @@ -0,0 +1 @@ +:empty
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test37 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test37 new file mode 100644 index 0000000..ba3455f --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test37 @@ -0,0 +1 @@ +div p
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test38 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test38 new file mode 100644 index 0000000..d6f24c0 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test38 @@ -0,0 +1 @@ +div table p
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test39 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test39 new file mode 100644 index 0000000..a72a605 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test39 @@ -0,0 +1 @@ +div > p
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test4 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test4 new file mode 100644 index 0000000..7b253d3 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test4 @@ -0,0 +1 @@ +*#t4
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test40 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test40 new file mode 100644 index 0000000..407ea3c --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test40 @@ -0,0 +1 @@ +p ~ p
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test41 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test41 new file mode 100644 index 0000000..e36e094 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test41 @@ -0,0 +1 @@ +p + p
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test42 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test42 new file mode 100644 index 0000000..fa59ada --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test42 @@ -0,0 +1 @@ +li, p
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test43 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test43 new file mode 100644 index 0000000..e946ff3 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test43 @@ -0,0 +1 @@ +p +/*This is a comment*/ p
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test44 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test44 new file mode 100644 index 0000000..df68954 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test44 @@ -0,0 +1 @@ +p:contains("that wraps")
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test45 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test45 new file mode 100644 index 0000000..5c479a9 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test45 @@ -0,0 +1 @@ +p:containsOwn("that wraps")
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test46 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test46 new file mode 100644 index 0000000..a189d03 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test46 @@ -0,0 +1 @@ +:containsOwn("inner")
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test47 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test47 new file mode 100644 index 0000000..bfba368 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test47 @@ -0,0 +1 @@ +p:containsOwn("block")
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test48 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test48 new file mode 100644 index 0000000..4a6abb3 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test48 @@ -0,0 +1 @@ +div:has(#p1)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test49 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test49 new file mode 100644 index 0000000..2048a0c --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test49 @@ -0,0 +1 @@ +div:has(:containsOwn("2"))
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test5 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test5 new file mode 100644 index 0000000..702c0f1 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test5 @@ -0,0 +1 @@ +.t1
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test50 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test50 new file mode 100644 index 0000000..c062e4c --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test50 @@ -0,0 +1 @@ +body :has(:containsOwn("2"))
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test51 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test51 new file mode 100644 index 0000000..1512260 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test51 @@ -0,0 +1 @@ +body :haschild(:containsOwn("2"))
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test52 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test52 new file mode 100644 index 0000000..c3dee62 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test52 @@ -0,0 +1 @@ +p:matches([\d])
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test53 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test53 new file mode 100644 index 0000000..90f71d7 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test53 @@ -0,0 +1 @@ +p:matches([a-z])
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test54 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test54 new file mode 100644 index 0000000..88b4c28 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test54 @@ -0,0 +1 @@ +p:matches([a-zA-Z])
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test55 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test55 new file mode 100644 index 0000000..699b41f --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test55 @@ -0,0 +1 @@ +p:matches([^\d])
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test56 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test56 new file mode 100644 index 0000000..83d4c47 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test56 @@ -0,0 +1 @@ +p:matches(^(0|a))
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test57 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test57 new file mode 100644 index 0000000..e8507ba --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test57 @@ -0,0 +1 @@ +p:matches(^\d+$)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test58 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test58 new file mode 100644 index 0000000..e29dba9 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test58 @@ -0,0 +1 @@ +p:not(:matches(^\d+$))
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test59 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test59 new file mode 100644 index 0000000..b5f7206 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test59 @@ -0,0 +1 @@ +div :matchesOwn(^\d+$)
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test6 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test6 new file mode 100644 index 0000000..cf58afc --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test6 @@ -0,0 +1 @@ +p.t1
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test60 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test60 new file mode 100644 index 0000000..2154ba8 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test60 @@ -0,0 +1 @@ +[href#=(fina)]:not([href#=(\/\/[^\/]+untrusted)])
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test61 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test61 new file mode 100644 index 0000000..1c8f525 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test61 @@ -0,0 +1 @@ +[href#=(^https:\/\/[^\/]*\/?news)]
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test7 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test7 new file mode 100644 index 0000000..9ad6c20 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test7 @@ -0,0 +1 @@ +div.teST
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test8 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test8 new file mode 100644 index 0000000..2ed1284 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test8 @@ -0,0 +1 @@ +.t1.fail
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test9 b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test9 new file mode 100644 index 0000000..8c1c2a2 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/corpus/test9 @@ -0,0 +1 @@ +p.t1.t2
\ No newline at end of file diff --git a/vendor/github.com/andybalholm/cascadia/fuzz/fuzz.go b/vendor/github.com/andybalholm/cascadia/fuzz/fuzz.go new file mode 100644 index 0000000..d1a46f8 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/fuzz/fuzz.go @@ -0,0 +1,15 @@ +package fuzz + +import "github.com/andybalholm/cascadia" + +// Fuzz is the entrypoint used by the go-fuzz framework +func Fuzz(data []byte) int { + sel, err := cascadia.Compile(string(data)) + if err != nil { + if sel != nil { + panic("sel != nil on error") + } + return 0 + } + return 1 +} diff --git a/vendor/github.com/andybalholm/cascadia/parser.go b/vendor/github.com/andybalholm/cascadia/parser.go new file mode 100644 index 0000000..495db9c --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/parser.go @@ -0,0 +1,835 @@ +// Package cascadia is an implementation of CSS selectors. +package cascadia + +import ( + "errors" + "fmt" + "regexp" + "strconv" + "strings" + + "golang.org/x/net/html" +) + +// a parser for CSS selectors +type parser struct { + s string // the source text + i int // the current position +} + +// parseEscape parses a backslash escape. +func (p *parser) parseEscape() (result string, err error) { + if len(p.s) < p.i+2 || p.s[p.i] != '\\' { + return "", errors.New("invalid escape sequence") + } + + start := p.i + 1 + c := p.s[start] + switch { + case c == '\r' || c == '\n' || c == '\f': + return "", errors.New("escaped line ending outside string") + case hexDigit(c): + // unicode escape (hex) + var i int + for i = start; i < p.i+6 && i < len(p.s) && hexDigit(p.s[i]); i++ { + // empty + } + v, _ := strconv.ParseUint(p.s[start:i], 16, 21) + if len(p.s) > i { + switch p.s[i] { + case '\r': + i++ + if len(p.s) > i && p.s[i] == '\n' { + i++ + } + case ' ', '\t', '\n', '\f': + i++ + } + } + p.i = i + return string(rune(v)), nil + } + + // Return the literal character after the backslash. + result = p.s[start : start+1] + p.i += 2 + return result, nil +} + +func hexDigit(c byte) bool { + return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' +} + +// nameStart returns whether c can be the first character of an identifier +// (not counting an initial hyphen, or an escape sequence). +func nameStart(c byte) bool { + return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' || c > 127 +} + +// nameChar returns whether c can be a character within an identifier +// (not counting an escape sequence). +func nameChar(c byte) bool { + return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' || c > 127 || + c == '-' || '0' <= c && c <= '9' +} + +// parseIdentifier parses an identifier. +func (p *parser) parseIdentifier() (result string, err error) { + startingDash := false + if len(p.s) > p.i && p.s[p.i] == '-' { + startingDash = true + p.i++ + } + + if len(p.s) <= p.i { + return "", errors.New("expected identifier, found EOF instead") + } + + if c := p.s[p.i]; !(nameStart(c) || c == '\\') { + return "", fmt.Errorf("expected identifier, found %c instead", c) + } + + result, err = p.parseName() + if startingDash && err == nil { + result = "-" + result + } + return +} + +// parseName parses a name (which is like an identifier, but doesn't have +// extra restrictions on the first character). +func (p *parser) parseName() (result string, err error) { + i := p.i +loop: + for i < len(p.s) { + c := p.s[i] + switch { + case nameChar(c): + start := i + for i < len(p.s) && nameChar(p.s[i]) { + i++ + } + result += p.s[start:i] + case c == '\\': + p.i = i + val, err := p.parseEscape() + if err != nil { + return "", err + } + i = p.i + result += val + default: + break loop + } + } + + if result == "" { + return "", errors.New("expected name, found EOF instead") + } + + p.i = i + return result, nil +} + +// parseString parses a single- or double-quoted string. +func (p *parser) parseString() (result string, err error) { + i := p.i + if len(p.s) < i+2 { + return "", errors.New("expected string, found EOF instead") + } + + quote := p.s[i] + i++ + +loop: + for i < len(p.s) { + switch p.s[i] { + case '\\': + if len(p.s) > i+1 { + switch c := p.s[i+1]; c { + case '\r': + if len(p.s) > i+2 && p.s[i+2] == '\n' { + i += 3 + continue loop + } + fallthrough + case '\n', '\f': + i += 2 + continue loop + } + } + p.i = i + val, err := p.parseEscape() + if err != nil { + return "", err + } + i = p.i + result += val + case quote: + break loop + case '\r', '\n', '\f': + return "", errors.New("unexpected end of line in string") + default: + start := i + for i < len(p.s) { + if c := p.s[i]; c == quote || c == '\\' || c == '\r' || c == '\n' || c == '\f' { + break + } + i++ + } + result += p.s[start:i] + } + } + + if i >= len(p.s) { + return "", errors.New("EOF in string") + } + + // Consume the final quote. + i++ + + p.i = i + return result, nil +} + +// parseRegex parses a regular expression; the end is defined by encountering an +// unmatched closing ')' or ']' which is not consumed +func (p *parser) parseRegex() (rx *regexp.Regexp, err error) { + i := p.i + if len(p.s) < i+2 { + return nil, errors.New("expected regular expression, found EOF instead") + } + + // number of open parens or brackets; + // when it becomes negative, finished parsing regex + open := 0 + +loop: + for i < len(p.s) { + switch p.s[i] { + case '(', '[': + open++ + case ')', ']': + open-- + if open < 0 { + break loop + } + } + i++ + } + + if i >= len(p.s) { + return nil, errors.New("EOF in regular expression") + } + rx, err = regexp.Compile(p.s[p.i:i]) + p.i = i + return rx, err +} + +// skipWhitespace consumes whitespace characters and comments. +// It returns true if there was actually anything to skip. +func (p *parser) skipWhitespace() bool { + i := p.i + for i < len(p.s) { + switch p.s[i] { + case ' ', '\t', '\r', '\n', '\f': + i++ + continue + case '/': + if strings.HasPrefix(p.s[i:], "/*") { + end := strings.Index(p.s[i+len("/*"):], "*/") + if end != -1 { + i += end + len("/**/") + continue + } + } + } + break + } + + if i > p.i { + p.i = i + return true + } + + return false +} + +// consumeParenthesis consumes an opening parenthesis and any following +// whitespace. It returns true if there was actually a parenthesis to skip. +func (p *parser) consumeParenthesis() bool { + if p.i < len(p.s) && p.s[p.i] == '(' { + p.i++ + p.skipWhitespace() + return true + } + return false +} + +// consumeClosingParenthesis consumes a closing parenthesis and any preceding +// whitespace. It returns true if there was actually a parenthesis to skip. +func (p *parser) consumeClosingParenthesis() bool { + i := p.i + p.skipWhitespace() + if p.i < len(p.s) && p.s[p.i] == ')' { + p.i++ + return true + } + p.i = i + return false +} + +// parseTypeSelector parses a type selector (one that matches by tag name). +func (p *parser) parseTypeSelector() (result Selector, err error) { + tag, err := p.parseIdentifier() + if err != nil { + return nil, err + } + + return typeSelector(tag), nil +} + +// parseIDSelector parses a selector that matches by id attribute. +func (p *parser) parseIDSelector() (Selector, error) { + if p.i >= len(p.s) { + return nil, fmt.Errorf("expected id selector (#id), found EOF instead") + } + if p.s[p.i] != '#' { + return nil, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i]) + } + + p.i++ + id, err := p.parseName() + if err != nil { + return nil, err + } + + return attributeEqualsSelector("id", id), nil +} + +// parseClassSelector parses a selector that matches by class attribute. +func (p *parser) parseClassSelector() (Selector, error) { + if p.i >= len(p.s) { + return nil, fmt.Errorf("expected class selector (.class), found EOF instead") + } + if p.s[p.i] != '.' { + return nil, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i]) + } + + p.i++ + class, err := p.parseIdentifier() + if err != nil { + return nil, err + } + + return attributeIncludesSelector("class", class), nil +} + +// parseAttributeSelector parses a selector that matches by attribute value. +func (p *parser) parseAttributeSelector() (Selector, error) { + if p.i >= len(p.s) { + return nil, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead") + } + if p.s[p.i] != '[' { + return nil, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i]) + } + + p.i++ + p.skipWhitespace() + key, err := p.parseIdentifier() + if err != nil { + return nil, err + } + + p.skipWhitespace() + if p.i >= len(p.s) { + return nil, errors.New("unexpected EOF in attribute selector") + } + + if p.s[p.i] == ']' { + p.i++ + return attributeExistsSelector(key), nil + } + + if p.i+2 >= len(p.s) { + return nil, errors.New("unexpected EOF in attribute selector") + } + + op := p.s[p.i : p.i+2] + if op[0] == '=' { + op = "=" + } else if op[1] != '=' { + return nil, fmt.Errorf(`expected equality operator, found "%s" instead`, op) + } + p.i += len(op) + + p.skipWhitespace() + if p.i >= len(p.s) { + return nil, errors.New("unexpected EOF in attribute selector") + } + var val string + var rx *regexp.Regexp + if op == "#=" { + rx, err = p.parseRegex() + } else { + switch p.s[p.i] { + case '\'', '"': + val, err = p.parseString() + default: + val, err = p.parseIdentifier() + } + } + if err != nil { + return nil, err + } + + p.skipWhitespace() + if p.i >= len(p.s) { + return nil, errors.New("unexpected EOF in attribute selector") + } + if p.s[p.i] != ']' { + return nil, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i]) + } + p.i++ + + switch op { + case "=": + return attributeEqualsSelector(key, val), nil + case "!=": + return attributeNotEqualSelector(key, val), nil + case "~=": + return attributeIncludesSelector(key, val), nil + case "|=": + return attributeDashmatchSelector(key, val), nil + case "^=": + return attributePrefixSelector(key, val), nil + case "$=": + return attributeSuffixSelector(key, val), nil + case "*=": + return attributeSubstringSelector(key, val), nil + case "#=": + return attributeRegexSelector(key, rx), nil + } + + return nil, fmt.Errorf("attribute operator %q is not supported", op) +} + +var errExpectedParenthesis = errors.New("expected '(' but didn't find it") +var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it") +var errUnmatchedParenthesis = errors.New("unmatched '('") + +// parsePseudoclassSelector parses a pseudoclass selector like :not(p). +func (p *parser) parsePseudoclassSelector() (Selector, error) { + if p.i >= len(p.s) { + return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead") + } + if p.s[p.i] != ':' { + return nil, fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i]) + } + + p.i++ + name, err := p.parseIdentifier() + if err != nil { + return nil, err + } + name = toLowerASCII(name) + + switch name { + case "not", "has", "haschild": + if !p.consumeParenthesis() { + return nil, errExpectedParenthesis + } + sel, parseErr := p.parseSelectorGroup() + if parseErr != nil { + return nil, parseErr + } + if !p.consumeClosingParenthesis() { + return nil, errExpectedClosingParenthesis + } + + switch name { + case "not": + return negatedSelector(sel), nil + case "has": + return hasDescendantSelector(sel), nil + case "haschild": + return hasChildSelector(sel), nil + } + + case "contains", "containsown": + if !p.consumeParenthesis() { + return nil, errExpectedParenthesis + } + if p.i == len(p.s) { + return nil, errUnmatchedParenthesis + } + var val string + switch p.s[p.i] { + case '\'', '"': + val, err = p.parseString() + default: + val, err = p.parseIdentifier() + } + if err != nil { + return nil, err + } + val = strings.ToLower(val) + p.skipWhitespace() + if p.i >= len(p.s) { + return nil, errors.New("unexpected EOF in pseudo selector") + } + if !p.consumeClosingParenthesis() { + return nil, errExpectedClosingParenthesis + } + + switch name { + case "contains": + return textSubstrSelector(val), nil + case "containsown": + return ownTextSubstrSelector(val), nil + } + + case "matches", "matchesown": + if !p.consumeParenthesis() { + return nil, errExpectedParenthesis + } + rx, err := p.parseRegex() + if err != nil { + return nil, err + } + if p.i >= len(p.s) { + return nil, errors.New("unexpected EOF in pseudo selector") + } + if !p.consumeClosingParenthesis() { + return nil, errExpectedClosingParenthesis + } + + switch name { + case "matches": + return textRegexSelector(rx), nil + case "matchesown": + return ownTextRegexSelector(rx), nil + } + + case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type": + if !p.consumeParenthesis() { + return nil, errExpectedParenthesis + } + a, b, err := p.parseNth() + if err != nil { + return nil, err + } + if !p.consumeClosingParenthesis() { + return nil, errExpectedClosingParenthesis + } + if a == 0 { + switch name { + case "nth-child": + return simpleNthChildSelector(b, false), nil + case "nth-of-type": + return simpleNthChildSelector(b, true), nil + case "nth-last-child": + return simpleNthLastChildSelector(b, false), nil + case "nth-last-of-type": + return simpleNthLastChildSelector(b, true), nil + } + } + return nthChildSelector(a, b, + name == "nth-last-child" || name == "nth-last-of-type", + name == "nth-of-type" || name == "nth-last-of-type"), + nil + + case "first-child": + return simpleNthChildSelector(1, false), nil + case "last-child": + return simpleNthLastChildSelector(1, false), nil + case "first-of-type": + return simpleNthChildSelector(1, true), nil + case "last-of-type": + return simpleNthLastChildSelector(1, true), nil + case "only-child": + return onlyChildSelector(false), nil + case "only-of-type": + return onlyChildSelector(true), nil + case "input": + return inputSelector, nil + case "empty": + return emptyElementSelector, nil + case "root": + return rootSelector, nil + } + + return nil, fmt.Errorf("unknown pseudoclass :%s", name) +} + +// parseInteger parses a decimal integer. +func (p *parser) parseInteger() (int, error) { + i := p.i + start := i + for i < len(p.s) && '0' <= p.s[i] && p.s[i] <= '9' { + i++ + } + if i == start { + return 0, errors.New("expected integer, but didn't find it") + } + p.i = i + + val, err := strconv.Atoi(p.s[start:i]) + if err != nil { + return 0, err + } + + return val, nil +} + +// parseNth parses the argument for :nth-child (normally of the form an+b). +func (p *parser) parseNth() (a, b int, err error) { + // initial state + if p.i >= len(p.s) { + goto eof + } + switch p.s[p.i] { + case '-': + p.i++ + goto negativeA + case '+': + p.i++ + goto positiveA + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + goto positiveA + case 'n', 'N': + a = 1 + p.i++ + goto readN + case 'o', 'O', 'e', 'E': + id, nameErr := p.parseName() + if nameErr != nil { + return 0, 0, nameErr + } + id = toLowerASCII(id) + if id == "odd" { + return 2, 1, nil + } + if id == "even" { + return 2, 0, nil + } + return 0, 0, fmt.Errorf("expected 'odd' or 'even', but found '%s' instead", id) + default: + goto invalid + } + +positiveA: + if p.i >= len(p.s) { + goto eof + } + switch p.s[p.i] { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + a, err = p.parseInteger() + if err != nil { + return 0, 0, err + } + goto readA + case 'n', 'N': + a = 1 + p.i++ + goto readN + default: + goto invalid + } + +negativeA: + if p.i >= len(p.s) { + goto eof + } + switch p.s[p.i] { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + a, err = p.parseInteger() + if err != nil { + return 0, 0, err + } + a = -a + goto readA + case 'n', 'N': + a = -1 + p.i++ + goto readN + default: + goto invalid + } + +readA: + if p.i >= len(p.s) { + goto eof + } + switch p.s[p.i] { + case 'n', 'N': + p.i++ + goto readN + default: + // The number we read as a is actually b. + return 0, a, nil + } + +readN: + p.skipWhitespace() + if p.i >= len(p.s) { + goto eof + } + switch p.s[p.i] { + case '+': + p.i++ + p.skipWhitespace() + b, err = p.parseInteger() + if err != nil { + return 0, 0, err + } + return a, b, nil + case '-': + p.i++ + p.skipWhitespace() + b, err = p.parseInteger() + if err != nil { + return 0, 0, err + } + return a, -b, nil + default: + return a, 0, nil + } + +eof: + return 0, 0, errors.New("unexpected EOF while attempting to parse expression of form an+b") + +invalid: + return 0, 0, errors.New("unexpected character while attempting to parse expression of form an+b") +} + +// parseSimpleSelectorSequence parses a selector sequence that applies to +// a single element. +func (p *parser) parseSimpleSelectorSequence() (Selector, error) { + var result Selector + + if p.i >= len(p.s) { + return nil, errors.New("expected selector, found EOF instead") + } + + switch p.s[p.i] { + case '*': + // It's the universal selector. Just skip over it, since it doesn't affect the meaning. + p.i++ + case '#', '.', '[', ':': + // There's no type selector. Wait to process the other till the main loop. + default: + r, err := p.parseTypeSelector() + if err != nil { + return nil, err + } + result = r + } + +loop: + for p.i < len(p.s) { + var ns Selector + var err error + switch p.s[p.i] { + case '#': + ns, err = p.parseIDSelector() + case '.': + ns, err = p.parseClassSelector() + case '[': + ns, err = p.parseAttributeSelector() + case ':': + ns, err = p.parsePseudoclassSelector() + default: + break loop + } + if err != nil { + return nil, err + } + if result == nil { + result = ns + } else { + result = intersectionSelector(result, ns) + } + } + + if result == nil { + result = func(n *html.Node) bool { + return n.Type == html.ElementNode + } + } + + return result, nil +} + +// parseSelector parses a selector that may include combinators. +func (p *parser) parseSelector() (result Selector, err error) { + p.skipWhitespace() + result, err = p.parseSimpleSelectorSequence() + if err != nil { + return + } + + for { + var combinator byte + if p.skipWhitespace() { + combinator = ' ' + } + if p.i >= len(p.s) { + return + } + + switch p.s[p.i] { + case '+', '>', '~': + combinator = p.s[p.i] + p.i++ + p.skipWhitespace() + case ',', ')': + // These characters can't begin a selector, but they can legally occur after one. + return + } + + if combinator == 0 { + return + } + + c, err := p.parseSimpleSelectorSequence() + if err != nil { + return nil, err + } + + switch combinator { + case ' ': + result = descendantSelector(result, c) + case '>': + result = childSelector(result, c) + case '+': + result = siblingSelector(result, c, true) + case '~': + result = siblingSelector(result, c, false) + } + } + + panic("unreachable") +} + +// parseSelectorGroup parses a group of selectors, separated by commas. +func (p *parser) parseSelectorGroup() (result Selector, err error) { + result, err = p.parseSelector() + if err != nil { + return + } + + for p.i < len(p.s) { + if p.s[p.i] != ',' { + return result, nil + } + p.i++ + c, err := p.parseSelector() + if err != nil { + return nil, err + } + result = unionSelector(result, c) + } + + return +} diff --git a/vendor/github.com/andybalholm/cascadia/parser_test.go b/vendor/github.com/andybalholm/cascadia/parser_test.go new file mode 100644 index 0000000..47dd4a6 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/parser_test.go @@ -0,0 +1,86 @@ +package cascadia + +import ( + "testing" +) + +var identifierTests = map[string]string{ + "x": "x", + "96": "", + "-x": "-x", + `r\e9 sumé`: "résumé", + `a\"b`: `a"b`, +} + +func TestParseIdentifier(t *testing.T) { + for source, want := range identifierTests { + p := &parser{s: source} + got, err := p.parseIdentifier() + + if err != nil { + if want == "" { + // It was supposed to be an error. + continue + } + t.Errorf("parsing %q: got error (%s), want %q", source, err, want) + continue + } + + if want == "" { + if err == nil { + t.Errorf("parsing %q: got %q, want error", source, got) + } + continue + } + + if p.i < len(source) { + t.Errorf("parsing %q: %d bytes left over", source, len(source)-p.i) + continue + } + + if got != want { + t.Errorf("parsing %q: got %q, want %q", source, got, want) + } + } +} + +var stringTests = map[string]string{ + `"x"`: "x", + `'x'`: "x", + `'x`: "", + "'x\\\r\nx'": "xx", + `"r\e9 sumé"`: "résumé", + `"a\"b"`: `a"b`, +} + +func TestParseString(t *testing.T) { + for source, want := range stringTests { + p := &parser{s: source} + got, err := p.parseString() + + if err != nil { + if want == "" { + // It was supposed to be an error. + continue + } + t.Errorf("parsing %q: got error (%s), want %q", source, err, want) + continue + } + + if want == "" { + if err == nil { + t.Errorf("parsing %q: got %q, want error", source, got) + } + continue + } + + if p.i < len(source) { + t.Errorf("parsing %q: %d bytes left over", source, len(source)-p.i) + continue + } + + if got != want { + t.Errorf("parsing %q: got %q, want %q", source, got, want) + } + } +} diff --git a/vendor/github.com/andybalholm/cascadia/selector.go b/vendor/github.com/andybalholm/cascadia/selector.go new file mode 100644 index 0000000..9fb05cc --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/selector.go @@ -0,0 +1,622 @@ +package cascadia + +import ( + "bytes" + "fmt" + "regexp" + "strings" + + "golang.org/x/net/html" +) + +// the Selector type, and functions for creating them + +// A Selector is a function which tells whether a node matches or not. +type Selector func(*html.Node) bool + +// hasChildMatch returns whether n has any child that matches a. +func hasChildMatch(n *html.Node, a Selector) bool { + for c := n.FirstChild; c != nil; c = c.NextSibling { + if a(c) { + return true + } + } + return false +} + +// hasDescendantMatch performs a depth-first search of n's descendants, +// testing whether any of them match a. It returns true as soon as a match is +// found, or false if no match is found. +func hasDescendantMatch(n *html.Node, a Selector) bool { + for c := n.FirstChild; c != nil; c = c.NextSibling { + if a(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) { + return true + } + } + return false +} + +// Compile parses a selector and returns, if successful, a Selector object +// that can be used to match against html.Node objects. +func Compile(sel string) (Selector, error) { + p := &parser{s: sel} + compiled, err := p.parseSelectorGroup() + if err != nil { + return nil, err + } + + if p.i < len(sel) { + return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) + } + + return compiled, nil +} + +// MustCompile is like Compile, but panics instead of returning an error. +func MustCompile(sel string) Selector { + compiled, err := Compile(sel) + if err != nil { + panic(err) + } + return compiled +} + +// MatchAll returns a slice of the nodes that match the selector, +// from n and its children. +func (s Selector) MatchAll(n *html.Node) []*html.Node { + return s.matchAllInto(n, nil) +} + +func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node { + if s(n) { + storage = append(storage, n) + } + + for child := n.FirstChild; child != nil; child = child.NextSibling { + storage = s.matchAllInto(child, storage) + } + + return storage +} + +// Match returns true if the node matches the selector. +func (s Selector) Match(n *html.Node) bool { + return s(n) +} + +// MatchFirst returns the first node that matches s, from n and its children. +func (s Selector) MatchFirst(n *html.Node) *html.Node { + if s.Match(n) { + return n + } + + for c := n.FirstChild; c != nil; c = c.NextSibling { + m := s.MatchFirst(c) + if m != nil { + return m + } + } + return nil +} + +// Filter returns the nodes in nodes that match the selector. +func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) { + for _, n := range nodes { + if s(n) { + result = append(result, n) + } + } + return result +} + +// typeSelector returns a Selector that matches elements with a given tag name. +func typeSelector(tag string) Selector { + tag = toLowerASCII(tag) + return func(n *html.Node) bool { + return n.Type == html.ElementNode && n.Data == tag + } +} + +// toLowerASCII returns s with all ASCII capital letters lowercased. +func toLowerASCII(s string) string { + var b []byte + for i := 0; i < len(s); i++ { + if c := s[i]; 'A' <= c && c <= 'Z' { + if b == nil { + b = make([]byte, len(s)) + copy(b, s) + } + b[i] = s[i] + ('a' - 'A') + } + } + + if b == nil { + return s + } + + return string(b) +} + +// attributeSelector returns a Selector that matches elements +// where the attribute named key satisifes the function f. +func attributeSelector(key string, f func(string) bool) Selector { + key = toLowerASCII(key) + return func(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + for _, a := range n.Attr { + if a.Key == key && f(a.Val) { + return true + } + } + return false + } +} + +// attributeExistsSelector returns a Selector that matches elements that have +// an attribute named key. +func attributeExistsSelector(key string) Selector { + return attributeSelector(key, func(string) bool { return true }) +} + +// attributeEqualsSelector returns a Selector that matches elements where +// the attribute named key has the value val. +func attributeEqualsSelector(key, val string) Selector { + return attributeSelector(key, + func(s string) bool { + return s == val + }) +} + +// attributeNotEqualSelector returns a Selector that matches elements where +// the attribute named key does not have the value val. +func attributeNotEqualSelector(key, val string) Selector { + key = toLowerASCII(key) + return func(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + for _, a := range n.Attr { + if a.Key == key && a.Val == val { + return false + } + } + return true + } +} + +// attributeIncludesSelector returns a Selector that matches elements where +// the attribute named key is a whitespace-separated list that includes val. +func attributeIncludesSelector(key, val string) Selector { + return attributeSelector(key, + func(s string) bool { + for s != "" { + i := strings.IndexAny(s, " \t\r\n\f") + if i == -1 { + return s == val + } + if s[:i] == val { + return true + } + s = s[i+1:] + } + return false + }) +} + +// attributeDashmatchSelector returns a Selector that matches elements where +// the attribute named key equals val or starts with val plus a hyphen. +func attributeDashmatchSelector(key, val string) Selector { + return attributeSelector(key, + func(s string) bool { + if s == val { + return true + } + if len(s) <= len(val) { + return false + } + if s[:len(val)] == val && s[len(val)] == '-' { + return true + } + return false + }) +} + +// attributePrefixSelector returns a Selector that matches elements where +// the attribute named key starts with val. +func attributePrefixSelector(key, val string) Selector { + return attributeSelector(key, + func(s string) bool { + if strings.TrimSpace(s) == "" { + return false + } + return strings.HasPrefix(s, val) + }) +} + +// attributeSuffixSelector returns a Selector that matches elements where +// the attribute named key ends with val. +func attributeSuffixSelector(key, val string) Selector { + return attributeSelector(key, + func(s string) bool { + if strings.TrimSpace(s) == "" { + return false + } + return strings.HasSuffix(s, val) + }) +} + +// attributeSubstringSelector returns a Selector that matches nodes where +// the attribute named key contains val. +func attributeSubstringSelector(key, val string) Selector { + return attributeSelector(key, + func(s string) bool { + if strings.TrimSpace(s) == "" { + return false + } + return strings.Contains(s, val) + }) +} + +// attributeRegexSelector returns a Selector that matches nodes where +// the attribute named key matches the regular expression rx +func attributeRegexSelector(key string, rx *regexp.Regexp) Selector { + return attributeSelector(key, + func(s string) bool { + return rx.MatchString(s) + }) +} + +// intersectionSelector returns a selector that matches nodes that match +// both a and b. +func intersectionSelector(a, b Selector) Selector { + return func(n *html.Node) bool { + return a(n) && b(n) + } +} + +// unionSelector returns a selector that matches elements that match +// either a or b. +func unionSelector(a, b Selector) Selector { + return func(n *html.Node) bool { + return a(n) || b(n) + } +} + +// negatedSelector returns a selector that matches elements that do not match a. +func negatedSelector(a Selector) Selector { + return func(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + return !a(n) + } +} + +// writeNodeText writes the text contained in n and its descendants to b. +func writeNodeText(n *html.Node, b *bytes.Buffer) { + switch n.Type { + case html.TextNode: + b.WriteString(n.Data) + case html.ElementNode: + for c := n.FirstChild; c != nil; c = c.NextSibling { + writeNodeText(c, b) + } + } +} + +// nodeText returns the text contained in n and its descendants. +func nodeText(n *html.Node) string { + var b bytes.Buffer + writeNodeText(n, &b) + return b.String() +} + +// nodeOwnText returns the contents of the text nodes that are direct +// children of n. +func nodeOwnText(n *html.Node) string { + var b bytes.Buffer + for c := n.FirstChild; c != nil; c = c.NextSibling { + if c.Type == html.TextNode { + b.WriteString(c.Data) + } + } + return b.String() +} + +// textSubstrSelector returns a selector that matches nodes that +// contain the given text. +func textSubstrSelector(val string) Selector { + return func(n *html.Node) bool { + text := strings.ToLower(nodeText(n)) + return strings.Contains(text, val) + } +} + +// ownTextSubstrSelector returns a selector that matches nodes that +// directly contain the given text +func ownTextSubstrSelector(val string) Selector { + return func(n *html.Node) bool { + text := strings.ToLower(nodeOwnText(n)) + return strings.Contains(text, val) + } +} + +// textRegexSelector returns a selector that matches nodes whose text matches +// the specified regular expression +func textRegexSelector(rx *regexp.Regexp) Selector { + return func(n *html.Node) bool { + return rx.MatchString(nodeText(n)) + } +} + +// ownTextRegexSelector returns a selector that matches nodes whose text +// directly matches the specified regular expression +func ownTextRegexSelector(rx *regexp.Regexp) Selector { + return func(n *html.Node) bool { + return rx.MatchString(nodeOwnText(n)) + } +} + +// hasChildSelector returns a selector that matches elements +// with a child that matches a. +func hasChildSelector(a Selector) Selector { + return func(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + return hasChildMatch(n, a) + } +} + +// hasDescendantSelector returns a selector that matches elements +// with any descendant that matches a. +func hasDescendantSelector(a Selector) Selector { + return func(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + return hasDescendantMatch(n, a) + } +} + +// nthChildSelector returns a selector that implements :nth-child(an+b). +// If last is true, implements :nth-last-child instead. +// If ofType is true, implements :nth-of-type instead. +func nthChildSelector(a, b int, last, ofType bool) Selector { + return func(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + + parent := n.Parent + if parent == nil { + return false + } + + if parent.Type == html.DocumentNode { + return false + } + + i := -1 + count := 0 + for c := parent.FirstChild; c != nil; c = c.NextSibling { + if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { + continue + } + count++ + if c == n { + i = count + if !last { + break + } + } + } + + if i == -1 { + // This shouldn't happen, since n should always be one of its parent's children. + return false + } + + if last { + i = count - i + 1 + } + + i -= b + if a == 0 { + return i == 0 + } + + return i%a == 0 && i/a >= 0 + } +} + +// simpleNthChildSelector returns a selector that implements :nth-child(b). +// If ofType is true, implements :nth-of-type instead. +func simpleNthChildSelector(b int, ofType bool) Selector { + return func(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + + parent := n.Parent + if parent == nil { + return false + } + + if parent.Type == html.DocumentNode { + return false + } + + count := 0 + for c := parent.FirstChild; c != nil; c = c.NextSibling { + if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { + continue + } + count++ + if c == n { + return count == b + } + if count >= b { + return false + } + } + return false + } +} + +// simpleNthLastChildSelector returns a selector that implements +// :nth-last-child(b). If ofType is true, implements :nth-last-of-type +// instead. +func simpleNthLastChildSelector(b int, ofType bool) Selector { + return func(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + + parent := n.Parent + if parent == nil { + return false + } + + if parent.Type == html.DocumentNode { + return false + } + + count := 0 + for c := parent.LastChild; c != nil; c = c.PrevSibling { + if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { + continue + } + count++ + if c == n { + return count == b + } + if count >= b { + return false + } + } + return false + } +} + +// onlyChildSelector returns a selector that implements :only-child. +// If ofType is true, it implements :only-of-type instead. +func onlyChildSelector(ofType bool) Selector { + return func(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + + parent := n.Parent + if parent == nil { + return false + } + + if parent.Type == html.DocumentNode { + return false + } + + count := 0 + for c := parent.FirstChild; c != nil; c = c.NextSibling { + if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { + continue + } + count++ + if count > 1 { + return false + } + } + + return count == 1 + } +} + +// inputSelector is a Selector that matches input, select, textarea and button elements. +func inputSelector(n *html.Node) bool { + return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button") +} + +// emptyElementSelector is a Selector that matches empty elements. +func emptyElementSelector(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + + for c := n.FirstChild; c != nil; c = c.NextSibling { + switch c.Type { + case html.ElementNode, html.TextNode: + return false + } + } + + return true +} + +// descendantSelector returns a Selector that matches an element if +// it matches d and has an ancestor that matches a. +func descendantSelector(a, d Selector) Selector { + return func(n *html.Node) bool { + if !d(n) { + return false + } + + for p := n.Parent; p != nil; p = p.Parent { + if a(p) { + return true + } + } + + return false + } +} + +// childSelector returns a Selector that matches an element if +// it matches d and its parent matches a. +func childSelector(a, d Selector) Selector { + return func(n *html.Node) bool { + return d(n) && n.Parent != nil && a(n.Parent) + } +} + +// siblingSelector returns a Selector that matches an element +// if it matches s2 and in is preceded by an element that matches s1. +// If adjacent is true, the sibling must be immediately before the element. +func siblingSelector(s1, s2 Selector, adjacent bool) Selector { + return func(n *html.Node) bool { + if !s2(n) { + return false + } + + if adjacent { + for n = n.PrevSibling; n != nil; n = n.PrevSibling { + if n.Type == html.TextNode || n.Type == html.CommentNode { + continue + } + return s1(n) + } + return false + } + + // Walk backwards looking for element that matches s1 + for c := n.PrevSibling; c != nil; c = c.PrevSibling { + if s1(c) { + return true + } + } + + return false + } +} + +// rootSelector implements :root +func rootSelector(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + if n.Parent == nil { + return false + } + return n.Parent.Type == html.DocumentNode +} diff --git a/vendor/github.com/andybalholm/cascadia/selector_test.go b/vendor/github.com/andybalholm/cascadia/selector_test.go new file mode 100644 index 0000000..7ff77e6 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/selector_test.go @@ -0,0 +1,654 @@ +package cascadia + +import ( + "bytes" + "strings" + "testing" + + "golang.org/x/net/html" +) + +type selectorTest struct { + HTML, selector string + results []string +} + +func nodeString(n *html.Node) string { + buf := bytes.NewBufferString("") + html.Render(buf, n) + return buf.String() +} + +var selectorTests = []selectorTest{ + { + `<body><address>This address...</address></body>`, + "address", + []string{ + "<address>This address...</address>", + }, + }, + { + `<!-- comment --><html><head></head><body>text</body></html>`, + "*", + []string{ + "<html><head></head><body>text</body></html>", + "<head></head>", + "<body>text</body>", + }, + }, + { + `<html><head></head><body></body></html>`, + "*", + []string{ + "<html><head></head><body></body></html>", + "<head></head>", + "<body></body>", + }, + }, + { + `<p id="foo"><p id="bar">`, + "#foo", + []string{ + `<p id="foo"></p>`, + }, + }, + { + `<ul><li id="t1"><p id="t1">`, + "li#t1", + []string{ + `<li id="t1"><p id="t1"></p></li>`, + }, + }, + { + `<ol><li id="t4"><li id="t44">`, + "*#t4", + []string{ + `<li id="t4"></li>`, + }, + }, + { + `<ul><li class="t1"><li class="t2">`, + ".t1", + []string{ + `<li class="t1"></li>`, + }, + }, + { + `<p class="t1 t2">`, + "p.t1", + []string{ + `<p class="t1 t2"></p>`, + }, + }, + { + `<div class="test">`, + "div.teST", + []string{}, + }, + { + `<p class="t1 t2">`, + ".t1.fail", + []string{}, + }, + { + `<p class="t1 t2">`, + "p.t1.t2", + []string{ + `<p class="t1 t2"></p>`, + }, + }, + { + `<p><p title="title">`, + "p[title]", + []string{ + `<p title="title"></p>`, + }, + }, + { + `<address><address title="foo"><address title="bar">`, + `address[title="foo"]`, + []string{ + `<address title="foo"><address title="bar"></address></address>`, + }, + }, + { + `<address><address title="foo"><address title="bar">`, + `address[title!="foo"]`, + []string{ + `<address><address title="foo"><address title="bar"></address></address></address>`, + `<address title="bar"></address>`, + }, + }, + { + `<p title="tot foo bar">`, + `[ title ~= foo ]`, + []string{ + `<p title="tot foo bar"></p>`, + }, + }, + { + `<p title="hello world">`, + `[title~="hello world"]`, + []string{}, + }, + { + `<p lang="en"><p lang="en-gb"><p lang="enough"><p lang="fr-en">`, + `[lang|="en"]`, + []string{ + `<p lang="en"></p>`, + `<p lang="en-gb"></p>`, + }, + }, + { + `<p title="foobar"><p title="barfoo">`, + `[title^="foo"]`, + []string{ + `<p title="foobar"></p>`, + }, + }, + { + `<p title="foobar"><p title="barfoo">`, + `[title$="bar"]`, + []string{ + `<p title="foobar"></p>`, + }, + }, + { + `<p title="foobarufoo">`, + `[title*="bar"]`, + []string{ + `<p title="foobarufoo"></p>`, + }, + }, + { + `<p class=" ">This text should be green.</p><p>This text should be green.</p>`, + `p[class$=" "]`, + []string{}, + }, + { + `<p class="">This text should be green.</p><p>This text should be green.</p>`, + `p[class$=""]`, + []string{}, + }, + { + `<p class=" ">This text should be green.</p><p>This text should be green.</p>`, + `p[class^=" "]`, + []string{}, + }, + { + `<p class="">This text should be green.</p><p>This text should be green.</p>`, + `p[class^=""]`, + []string{}, + }, + { + `<p class=" ">This text should be green.</p><p>This text should be green.</p>`, + `p[class*=" "]`, + []string{}, + }, + { + `<p class="">This text should be green.</p><p>This text should be green.</p>`, + `p[class*=""]`, + []string{}, + }, + { + `<input type="radio" name="Sex" value="F"/>`, + `input[name=Sex][value=F]`, + []string{ + `<input type="radio" name="Sex" value="F"/>`, + }, + }, + { + `<table border="0" cellpadding="0" cellspacing="0" style="table-layout: fixed; width: 100%; border: 0 dashed; border-color: #FFFFFF"><tr style="height:64px">aaa</tr></table>`, + `table[border="0"][cellpadding="0"][cellspacing="0"]`, + []string{ + `<table border="0" cellpadding="0" cellspacing="0" style="table-layout: fixed; width: 100%; border: 0 dashed; border-color: #FFFFFF"><tbody><tr style="height:64px"></tr></tbody></table>`, + }, + }, + { + `<p class="t1 t2">`, + ".t1:not(.t2)", + []string{}, + }, + { + `<div class="t3">`, + `div:not(.t1)`, + []string{ + `<div class="t3"></div>`, + }, + }, + { + `<div><div class="t2"><div class="t3">`, + `div:not([class="t2"])`, + []string{ + `<div><div class="t2"><div class="t3"></div></div></div>`, + `<div class="t3"></div>`, + }, + }, + { + `<ol><li id=1><li id=2><li id=3></ol>`, + `li:nth-child(odd)`, + []string{ + `<li id="1"></li>`, + `<li id="3"></li>`, + }, + }, + { + `<ol><li id=1><li id=2><li id=3></ol>`, + `li:nth-child(even)`, + []string{ + `<li id="2"></li>`, + }, + }, + { + `<ol><li id=1><li id=2><li id=3></ol>`, + `li:nth-child(-n+2)`, + []string{ + `<li id="1"></li>`, + `<li id="2"></li>`, + }, + }, + { + `<ol><li id=1><li id=2><li id=3></ol>`, + `li:nth-child(3n+1)`, + []string{ + `<li id="1"></li>`, + }, + }, + { + `<ol><li id=1><li id=2><li id=3><li id=4></ol>`, + `li:nth-last-child(odd)`, + []string{ + `<li id="2"></li>`, + `<li id="4"></li>`, + }, + }, + { + `<ol><li id=1><li id=2><li id=3><li id=4></ol>`, + `li:nth-last-child(even)`, + []string{ + `<li id="1"></li>`, + `<li id="3"></li>`, + }, + }, + { + `<ol><li id=1><li id=2><li id=3><li id=4></ol>`, + `li:nth-last-child(-n+2)`, + []string{ + `<li id="3"></li>`, + `<li id="4"></li>`, + }, + }, + { + `<ol><li id=1><li id=2><li id=3><li id=4></ol>`, + `li:nth-last-child(3n+1)`, + []string{ + `<li id="1"></li>`, + `<li id="4"></li>`, + }, + }, + { + `<p>some text <span id="1">and a span</span><span id="2"> and another</span></p>`, + `span:first-child`, + []string{ + `<span id="1">and a span</span>`, + }, + }, + { + `<span>a span</span> and some text`, + `span:last-child`, + []string{ + `<span>a span</span>`, + }, + }, + { + `<address></address><p id=1><p id=2>`, + `p:nth-of-type(2)`, + []string{ + `<p id="2"></p>`, + }, + }, + { + `<address></address><p id=1><p id=2></p><a>`, + `p:nth-last-of-type(2)`, + []string{ + `<p id="1"></p>`, + }, + }, + { + `<address></address><p id=1><p id=2></p><a>`, + `p:last-of-type`, + []string{ + `<p id="2"></p>`, + }, + }, + { + `<address></address><p id=1><p id=2></p><a>`, + `p:first-of-type`, + []string{ + `<p id="1"></p>`, + }, + }, + { + `<div><p id="1"></p><a></a></div><div><p id="2"></p></div>`, + `p:only-child`, + []string{ + `<p id="2"></p>`, + }, + }, + { + `<div><p id="1"></p><a></a></div><div><p id="2"></p><p id="3"></p></div>`, + `p:only-of-type`, + []string{ + `<p id="1"></p>`, + }, + }, + { + `<p id="1"><!-- --><p id="2">Hello<p id="3"><span>`, + `:empty`, + []string{ + `<head></head>`, + `<p id="1"><!-- --></p>`, + `<span></span>`, + }, + }, + { + `<div><p id="1"><table><tr><td><p id="2"></table></div><p id="3">`, + `div p`, + []string{ + `<p id="1"><table><tbody><tr><td><p id="2"></p></td></tr></tbody></table></p>`, + `<p id="2"></p>`, + }, + }, + { + `<div><p id="1"><table><tr><td><p id="2"></table></div><p id="3">`, + `div table p`, + []string{ + `<p id="2"></p>`, + }, + }, + { + `<div><p id="1"><div><p id="2"></div><table><tr><td><p id="3"></table></div>`, + `div > p`, + []string{ + `<p id="1"></p>`, + `<p id="2"></p>`, + }, + }, + { + `<p id="1"><p id="2"></p><address></address><p id="3">`, + `p ~ p`, + []string{ + `<p id="2"></p>`, + `<p id="3"></p>`, + }, + }, + { + `<p id="1"></p> + <!--comment--> + <p id="2"></p><address></address><p id="3">`, + `p + p`, + []string{ + `<p id="2"></p>`, + }, + }, + { + `<ul><li></li><li></li></ul><p>`, + `li, p`, + []string{ + "<li></li>", + "<li></li>", + "<p></p>", + }, + }, + { + `<p id="1"><p id="2"></p><address></address><p id="3">`, + `p +/*This is a comment*/ p`, + []string{ + `<p id="2"></p>`, + }, + }, + { + `<p>Text block that <span>wraps inner text</span> and continues</p>`, + `p:contains("that wraps")`, + []string{ + `<p>Text block that <span>wraps inner text</span> and continues</p>`, + }, + }, + { + `<p>Text block that <span>wraps inner text</span> and continues</p>`, + `p:containsOwn("that wraps")`, + []string{}, + }, + { + `<p>Text block that <span>wraps inner text</span> and continues</p>`, + `:containsOwn("inner")`, + []string{ + `<span>wraps inner text</span>`, + }, + }, + { + `<p>Text block that <span>wraps inner text</span> and continues</p>`, + `p:containsOwn("block")`, + []string{ + `<p>Text block that <span>wraps inner text</span> and continues</p>`, + }, + }, + { + `<div id="d1"><p id="p1"><span>text content</span></p></div><div id="d2"/>`, + `div:has(#p1)`, + []string{ + `<div id="d1"><p id="p1"><span>text content</span></p></div>`, + }, + }, + { + `<div id="d1"><p id="p1"><span>contents 1</span></p></div> + <div id="d2"><p>contents <em>2</em></p></div>`, + `div:has(:containsOwn("2"))`, + []string{ + `<div id="d2"><p>contents <em>2</em></p></div>`, + }, + }, + { + `<body><div id="d1"><p id="p1"><span>contents 1</span></p></div> + <div id="d2"><p id="p2">contents <em>2</em></p></div></body>`, + `body :has(:containsOwn("2"))`, + []string{ + `<div id="d2"><p id="p2">contents <em>2</em></p></div>`, + `<p id="p2">contents <em>2</em></p>`, + }, + }, + { + `<body><div id="d1"><p id="p1"><span>contents 1</span></p></div> + <div id="d2"><p id="p2">contents <em>2</em></p></div></body>`, + `body :haschild(:containsOwn("2"))`, + []string{ + `<p id="p2">contents <em>2</em></p>`, + }, + }, + { + `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`, + `p:matches([\d])`, + []string{ + `<p id="p1">0123456789</p>`, + `<p id="p3">0123ABCD</p>`, + }, + }, + { + `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`, + `p:matches([a-z])`, + []string{ + `<p id="p2">abcdef</p>`, + }, + }, + { + `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`, + `p:matches([a-zA-Z])`, + []string{ + `<p id="p2">abcdef</p>`, + `<p id="p3">0123ABCD</p>`, + }, + }, + { + `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`, + `p:matches([^\d])`, + []string{ + `<p id="p2">abcdef</p>`, + `<p id="p3">0123ABCD</p>`, + }, + }, + { + `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`, + `p:matches(^(0|a))`, + []string{ + `<p id="p1">0123456789</p>`, + `<p id="p2">abcdef</p>`, + `<p id="p3">0123ABCD</p>`, + }, + }, + { + `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`, + `p:matches(^\d+$)`, + []string{ + `<p id="p1">0123456789</p>`, + }, + }, + { + `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`, + `p:not(:matches(^\d+$))`, + []string{ + `<p id="p2">abcdef</p>`, + `<p id="p3">0123ABCD</p>`, + }, + }, + { + `<div><p id="p1">01234<em>567</em>89</p><div>`, + `div :matchesOwn(^\d+$)`, + []string{ + `<p id="p1">01234<em>567</em>89</p>`, + `<em>567</em>`, + }, + }, + { + `<ul> + <li><a id="a1" href="http://www.google.com/finance"></a> + <li><a id="a2" href="http://finance.yahoo.com/"></a> + <li><a id="a2" href="http://finance.untrusted.com/"/> + <li><a id="a3" href="https://www.google.com/news"/> + <li><a id="a4" href="http://news.yahoo.com"/> + </ul>`, + `[href#=(fina)]:not([href#=(\/\/[^\/]+untrusted)])`, + []string{ + `<a id="a1" href="http://www.google.com/finance"></a>`, + `<a id="a2" href="http://finance.yahoo.com/"></a>`, + }, + }, + { + `<ul> + <li><a id="a1" href="http://www.google.com/finance"/> + <li><a id="a2" href="http://finance.yahoo.com/"/> + <li><a id="a3" href="https://www.google.com/news"></a> + <li><a id="a4" href="http://news.yahoo.com"/> + </ul>`, + `[href#=(^https:\/\/[^\/]*\/?news)]`, + []string{ + `<a id="a3" href="https://www.google.com/news"></a>`, + }, + }, + { + `<form> + <label>Username <input type="text" name="username" /></label> + <label>Password <input type="password" name="password" /></label> + <label>Country + <select name="country"> + <option value="ca">Canada</option> + <option value="us">United States</option> + </select> + </label> + <label>Bio <textarea name="bio"></textarea></label> + <button>Sign up</button> + </form>`, + `:input`, + []string{ + `<input type="text" name="username"/>`, + `<input type="password" name="password"/>`, + `<select name="country"> + <option value="ca">Canada</option> + <option value="us">United States</option> + </select>`, + `<textarea name="bio"></textarea>`, + `<button>Sign up</button>`, + }, + }, + { + `<html><head></head><body></body></html>`, + ":root", + []string{ + "<html><head></head><body></body></html>", + }, + }, + { + `<html><head></head><body></body></html>`, + "*:root", + []string{ + "<html><head></head><body></body></html>", + }, + }, + { + `<html><head></head><body></body></html>`, + "*:root:first-child", + []string{}, + }, + { + `<html><head></head><body></body></html>`, + "*:root:nth-child(1)", + []string{}, + }, + { + `<html><head></head><body><a href="http://www.foo.com"></a></body></html>`, + "a:not(:root)", + []string{ + `<a href="http://www.foo.com"></a>`, + }, + }, +} + +func TestSelectors(t *testing.T) { + for _, test := range selectorTests { + s, err := Compile(test.selector) + if err != nil { + t.Errorf("error compiling %q: %s", test.selector, err) + continue + } + + doc, err := html.Parse(strings.NewReader(test.HTML)) + if err != nil { + t.Errorf("error parsing %q: %s", test.HTML, err) + continue + } + + matches := s.MatchAll(doc) + if len(matches) != len(test.results) { + t.Errorf("selector %s wanted %d elements, got %d instead", test.selector, len(test.results), len(matches)) + continue + } + + for i, m := range matches { + got := nodeString(m) + if got != test.results[i] { + t.Errorf("selector %s wanted %s, got %s instead", test.selector, test.results[i], got) + } + } + + firstMatch := s.MatchFirst(doc) + if len(test.results) == 0 { + if firstMatch != nil { + t.Errorf("MatchFirst: selector %s want nil, got %s", test.selector, nodeString(firstMatch)) + } + } else { + got := nodeString(firstMatch) + if got != test.results[0] { + t.Errorf("MatchFirst: selector %s want %s, got %s", test.selector, test.results[0], got) + } + } + } +} |