diff options
Diffstat (limited to 'vendor/github.com/tdewolff/parse/xml/lex_test.go')
-rw-r--r-- | vendor/github.com/tdewolff/parse/xml/lex_test.go | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/parse/xml/lex_test.go b/vendor/github.com/tdewolff/parse/xml/lex_test.go new file mode 100644 index 0000000..f8cdd17 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/xml/lex_test.go @@ -0,0 +1,193 @@ +package xml // import "github.com/tdewolff/parse/xml" + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/tdewolff/parse" + "github.com/tdewolff/test" +) + +type TTs []TokenType + +func TestTokens(t *testing.T) { + var tokenTests = []struct { + xml string + expected []TokenType + }{ + {"", TTs{}}, + {"<!-- comment -->", TTs{CommentToken}}, + {"<!-- comment \n multi \r line -->", TTs{CommentToken}}, + {"<foo/>", TTs{StartTagToken, StartTagCloseVoidToken}}, + {"<foo \t\r\n/>", TTs{StartTagToken, StartTagCloseVoidToken}}, + {"<foo:bar.qux-norf/>", TTs{StartTagToken, StartTagCloseVoidToken}}, + {"<foo></foo>", TTs{StartTagToken, StartTagCloseToken, EndTagToken}}, + {"<foo>text</foo>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}}, + {"<foo/> text", TTs{StartTagToken, StartTagCloseVoidToken, TextToken}}, + {"<a> <b> <c>text</c> </b> </a>", TTs{StartTagToken, StartTagCloseToken, TextToken, StartTagToken, StartTagCloseToken, TextToken, StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken, TextToken, EndTagToken}}, + {"<foo a='a' b=\"b\" c=c/>", TTs{StartTagToken, AttributeToken, AttributeToken, AttributeToken, StartTagCloseVoidToken}}, + {"<foo a=\"\"/>", TTs{StartTagToken, AttributeToken, StartTagCloseVoidToken}}, + {"<foo a-b=\"\"/>", TTs{StartTagToken, AttributeToken, StartTagCloseVoidToken}}, + {"<foo \nchecked \r\n value\r=\t'=/>\"' />", TTs{StartTagToken, AttributeToken, AttributeToken, StartTagCloseVoidToken}}, + {"<?xml?>", TTs{StartTagPIToken, StartTagClosePIToken}}, + {"<?xml a=\"a\" ?>", TTs{StartTagPIToken, AttributeToken, StartTagClosePIToken}}, + {"<?xml a=a?>", TTs{StartTagPIToken, AttributeToken, StartTagClosePIToken}}, + {"<![CDATA[ test ]]>", TTs{CDATAToken}}, + {"<!DOCTYPE>", TTs{DOCTYPEToken}}, + {"<!DOCTYPE note SYSTEM \"Note.dtd\">", TTs{DOCTYPEToken}}, + {`<!DOCTYPE note [<!ENTITY nbsp " "><!ENTITY writer "Writer: Donald Duck."><!ENTITY copyright "Copyright:]> W3Schools.">]>`, TTs{DOCTYPEToken}}, + {"<!foo>", TTs{StartTagToken, StartTagCloseToken}}, + + // early endings + {"<!-- comment", TTs{CommentToken}}, + {"<foo", TTs{StartTagToken}}, + {"</foo", TTs{EndTagToken}}, + {"<foo x", TTs{StartTagToken, AttributeToken}}, + {"<foo x=", TTs{StartTagToken, AttributeToken}}, + {"<foo x='", TTs{StartTagToken, AttributeToken}}, + {"<foo x=''", TTs{StartTagToken, AttributeToken}}, + {"<?xml", TTs{StartTagPIToken}}, + {"<![CDATA[ test", TTs{CDATAToken}}, + {"<!DOCTYPE note SYSTEM", TTs{DOCTYPEToken}}, + + // go fuzz + {"</", TTs{EndTagToken}}, + {"</\n", TTs{EndTagToken}}, + } + for _, tt := range tokenTests { + t.Run(tt.xml, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.xml)) + i := 0 + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } + test.That(t, i < len(tt.expected), "index", i, "must not exceed expected token types size", len(tt.expected)) + if i < len(tt.expected) { + test.T(t, token, tt.expected[i], "token types must match") + } + i++ + } + }) + } + + test.T(t, TokenType(100).String(), "Invalid(100)") +} + +func TestTags(t *testing.T) { + var tagTests = []struct { + xml string + expected string + }{ + {"<foo:bar.qux-norf/>", "foo:bar.qux-norf"}, + {"<?xml?>", "xml"}, + {"<foo?bar/qux>", "foo?bar/qux"}, + {"<!DOCTYPE note SYSTEM \"Note.dtd\">", " note SYSTEM \"Note.dtd\""}, + + // early endings + {"<foo ", "foo"}, + } + for _, tt := range tagTests { + t.Run(tt.xml, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.xml)) + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.Fail(t, "when error occurred we must be at the end") + break + } else if token == StartTagToken || token == StartTagPIToken || token == EndTagToken || token == DOCTYPEToken { + test.String(t, string(l.Text()), tt.expected, "tags must match") + break + } + } + }) + } +} + +func TestAttributes(t *testing.T) { + var attributeTests = []struct { + attr string + expected []string + }{ + {"<foo a=\"b\" />", []string{"a", "\"b\""}}, + {"<foo \nchecked \r\n value\r=\t'=/>\"' />", []string{"checked", "", "value", "'=/>\"'"}}, + {"<foo bar=\" a \n\t\r b \" />", []string{"bar", "\" a b \""}}, + {"<?xml a=b?>", []string{"a", "b"}}, + {"<foo /=? >", []string{"/", "?"}}, + + // early endings + {"<foo x", []string{"x", ""}}, + {"<foo x=", []string{"x", ""}}, + {"<foo x='", []string{"x", "'"}}, + } + for _, tt := range attributeTests { + t.Run(tt.attr, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.attr)) + i := 0 + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } else if token == AttributeToken { + test.That(t, i+1 < len(tt.expected), "index", i+1, "must not exceed expected attributes size", len(tt.expected)) + if i+1 < len(tt.expected) { + test.String(t, string(l.Text()), tt.expected[i], "attribute keys must match") + test.String(t, string(l.AttrVal()), tt.expected[i+1], "attribute keys must match") + i += 2 + } + } + } + }) + } +} + +func TestErrors(t *testing.T) { + var errorTests = []struct { + xml string + col int + }{ + {"a\x00b", 2}, + } + for _, tt := range errorTests { + t.Run(tt.xml, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.xml)) + for { + token, _ := l.Next() + if token == ErrorToken { + if tt.col == 0 { + test.T(t, l.Err(), io.EOF) + } else if perr, ok := l.Err().(*parse.Error); ok { + test.T(t, perr.Col, tt.col) + } else { + test.Fail(t, "bad error:", l.Err()) + } + break + } + } + }) + } +} + +//////////////////////////////////////////////////////////////// + +func ExampleNewLexer() { + l := NewLexer(bytes.NewBufferString("<span class='user'>John Doe</span>")) + out := "" + for { + tt, data := l.Next() + if tt == ErrorToken { + break + } + out += string(data) + } + fmt.Println(out) + // Output: <span class='user'>John Doe</span> +} |