aboutsummaryrefslogtreecommitdiffhomepage
path: root/vendor/github.com/tdewolff/parse/html/lex_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/tdewolff/parse/html/lex_test.go')
-rw-r--r--vendor/github.com/tdewolff/parse/html/lex_test.go262
1 files changed, 262 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/parse/html/lex_test.go b/vendor/github.com/tdewolff/parse/html/lex_test.go
new file mode 100644
index 0000000..5f4ca0b
--- /dev/null
+++ b/vendor/github.com/tdewolff/parse/html/lex_test.go
@@ -0,0 +1,262 @@
+package html // import "github.com/tdewolff/parse/html"
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "testing"
+
+ "github.com/tdewolff/parse"
+ "github.com/tdewolff/test"
+)
+
+type TTs []TokenType
+
+func TestTokens(t *testing.T) {
+ var tokenTests = []struct {
+ html string
+ expected []TokenType
+ }{
+ {"<html></html>", TTs{StartTagToken, StartTagCloseToken, EndTagToken}},
+ {"<img/>", TTs{StartTagToken, StartTagVoidToken}},
+ {"<!-- comment -->", TTs{CommentToken}},
+ {"<!-- comment --!>", TTs{CommentToken}},
+ {"<p>text</p>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
+ {"<input type='button'/>", TTs{StartTagToken, AttributeToken, StartTagVoidToken}},
+ {"<input type='button' value=''/>", TTs{StartTagToken, AttributeToken, AttributeToken, StartTagVoidToken}},
+ {"<input type='=/>' \r\n\t\f value=\"'\" name=x checked />", TTs{StartTagToken, AttributeToken, AttributeToken, AttributeToken, AttributeToken, StartTagVoidToken}},
+ {"<!doctype>", TTs{DoctypeToken}},
+ {"<!doctype html>", TTs{DoctypeToken}},
+ {"<?bogus>", TTs{CommentToken}},
+ {"</0bogus>", TTs{CommentToken}},
+ {"<!bogus>", TTs{CommentToken}},
+ {"< ", TTs{TextToken}},
+ {"</", TTs{TextToken}},
+
+ // raw tags
+ {"<title><p></p></title>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
+ {"<TITLE><p></p></TITLE>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
+ {"<plaintext></plaintext>", TTs{StartTagToken, StartTagCloseToken, TextToken}},
+ {"<script></script>", TTs{StartTagToken, StartTagCloseToken, EndTagToken}},
+ {"<script>var x='</script>';</script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken}},
+ {"<script><!--var x='</script>';--></script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken}},
+ {"<script><!--var x='<script></script>';--></script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
+ {"<script><!--var x='<script>';--></script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
+ {"<![CDATA[ test ]]>", TTs{TextToken}},
+ {"<svg>text</svg>", TTs{SvgToken}},
+ {"<math>text</math>", TTs{MathToken}},
+ {`<svg>text<x a="</svg>"></x></svg>`, TTs{SvgToken}},
+ {"<a><svg>text</svg></a>", TTs{StartTagToken, StartTagCloseToken, SvgToken, EndTagToken}},
+
+ // early endings
+ {"<!-- comment", TTs{CommentToken}},
+ {"<? bogus comment", TTs{CommentToken}},
+ {"<foo", TTs{StartTagToken}},
+ {"</foo", TTs{EndTagToken}},
+ {"<foo x", TTs{StartTagToken, AttributeToken}},
+ {"<foo x=", TTs{StartTagToken, AttributeToken}},
+ {"<foo x='", TTs{StartTagToken, AttributeToken}},
+ {"<foo x=''", TTs{StartTagToken, AttributeToken}},
+ {"<!DOCTYPE note SYSTEM", TTs{DoctypeToken}},
+ {"<![CDATA[ test", TTs{TextToken}},
+ {"<script>", TTs{StartTagToken, StartTagCloseToken}},
+ {"<script><!--", TTs{StartTagToken, StartTagCloseToken, TextToken}},
+ {"<script><!--var x='<script></script>';-->", TTs{StartTagToken, StartTagCloseToken, TextToken}},
+
+ // go-fuzz
+ {"</>", TTs{EndTagToken}},
+ }
+ for _, tt := range tokenTests {
+ t.Run(tt.html, func(t *testing.T) {
+ l := NewLexer(bytes.NewBufferString(tt.html))
+ i := 0
+ for {
+ token, _ := l.Next()
+ if token == ErrorToken {
+ test.T(t, l.Err(), io.EOF)
+ test.T(t, i, len(tt.expected), "when error occurred we must be at the end")
+ break
+ }
+ test.That(t, i < len(tt.expected), "index", i, "must not exceed expected token types size", len(tt.expected))
+ if i < len(tt.expected) {
+ test.T(t, token, tt.expected[i], "token types must match")
+ }
+ i++
+ }
+ })
+ }
+
+ test.T(t, TokenType(100).String(), "Invalid(100)")
+}
+
+func TestTags(t *testing.T) {
+ var tagTests = []struct {
+ html string
+ expected string
+ }{
+ {"<foo:bar.qux-norf/>", "foo:bar.qux-norf"},
+ {"<foo?bar/qux>", "foo?bar/qux"},
+ {"<!DOCTYPE note SYSTEM \"Note.dtd\">", " note SYSTEM \"Note.dtd\""},
+ {"</foo >", "foo"},
+
+ // early endings
+ {"<foo ", "foo"},
+ }
+ for _, tt := range tagTests {
+ t.Run(tt.html, func(t *testing.T) {
+ l := NewLexer(bytes.NewBufferString(tt.html))
+ for {
+ token, _ := l.Next()
+ if token == ErrorToken {
+ test.T(t, l.Err(), io.EOF)
+ test.Fail(t, "when error occurred we must be at the end")
+ break
+ } else if token == StartTagToken || token == EndTagToken || token == DoctypeToken {
+ test.String(t, string(l.Text()), tt.expected)
+ break
+ }
+ }
+ })
+ }
+}
+
+func TestAttributes(t *testing.T) {
+ var attributeTests = []struct {
+ attr string
+ expected []string
+ }{
+ {"<foo a=\"b\" />", []string{"a", "\"b\""}},
+ {"<foo \nchecked \r\n value\r=\t'=/>\"' />", []string{"checked", "", "value", "'=/>\"'"}},
+ {"<foo bar=\" a \n\t\r b \" />", []string{"bar", "\" a \n\t\r b \""}},
+ {"<foo a/>", []string{"a", ""}},
+ {"<foo /=/>", []string{"/", "/"}},
+
+ // early endings
+ {"<foo x", []string{"x", ""}},
+ {"<foo x=", []string{"x", ""}},
+ {"<foo x='", []string{"x", "'"}},
+ }
+ for _, tt := range attributeTests {
+ t.Run(tt.attr, func(t *testing.T) {
+ l := NewLexer(bytes.NewBufferString(tt.attr))
+ i := 0
+ for {
+ token, _ := l.Next()
+ if token == ErrorToken {
+ test.T(t, l.Err(), io.EOF)
+ test.T(t, i, len(tt.expected), "when error occurred we must be at the end")
+ break
+ } else if token == AttributeToken {
+ test.That(t, i+1 < len(tt.expected), "index", i+1, "must not exceed expected attributes size", len(tt.expected))
+ if i+1 < len(tt.expected) {
+ test.String(t, string(l.Text()), tt.expected[i], "attribute keys must match")
+ test.String(t, string(l.AttrVal()), tt.expected[i+1], "attribute keys must match")
+ i += 2
+ }
+ }
+ }
+ })
+ }
+}
+
+func TestErrors(t *testing.T) {
+ var errorTests = []struct {
+ html string
+ col int
+ }{
+ {"a\x00b", 2},
+ }
+ for _, tt := range errorTests {
+ t.Run(tt.html, func(t *testing.T) {
+ l := NewLexer(bytes.NewBufferString(tt.html))
+ for {
+ token, _ := l.Next()
+ if token == ErrorToken {
+ if tt.col == 0 {
+ test.T(t, l.Err(), io.EOF)
+ } else if perr, ok := l.Err().(*parse.Error); ok {
+ test.T(t, perr.Col, tt.col)
+ } else {
+ test.Fail(t, "bad error:", l.Err())
+ }
+ break
+ }
+ }
+ })
+ }
+}
+
+////////////////////////////////////////////////////////////////
+
+var J int
+var ss = [][]byte{
+ []byte(" style"),
+ []byte("style"),
+ []byte(" \r\n\tstyle"),
+ []byte(" style"),
+ []byte(" x"),
+ []byte("x"),
+}
+
+func BenchmarkWhitespace1(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ for _, s := range ss {
+ j := 0
+ for {
+ if c := s[j]; c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
+ j++
+ } else {
+ break
+ }
+ }
+ J += j
+ }
+ }
+}
+
+func BenchmarkWhitespace2(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ for _, s := range ss {
+ j := 0
+ for {
+ if c := s[j]; c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
+ j++
+ continue
+ }
+ break
+ }
+ J += j
+ }
+ }
+}
+
+func BenchmarkWhitespace3(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ for _, s := range ss {
+ j := 0
+ for {
+ if c := s[j]; c != ' ' && c != '\t' && c != '\n' && c != '\r' && c != '\f' {
+ break
+ }
+ j++
+ }
+ J += j
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////
+
+func ExampleNewLexer() {
+ l := NewLexer(bytes.NewBufferString("<span class='user'>John Doe</span>"))
+ out := ""
+ for {
+ tt, data := l.Next()
+ if tt == ErrorToken {
+ break
+ }
+ out += string(data)
+ }
+ fmt.Println(out)
+ // Output: <span class='user'>John Doe</span>
+}