diff options
author | Frédéric Guillot <fred@miniflux.net> | 2017-11-19 21:10:04 -0800 |
---|---|---|
committer | Frédéric Guillot <fred@miniflux.net> | 2017-11-19 22:01:46 -0800 |
commit | 8ffb773f43c8dc54801ca1d111854e7e881c93c9 (patch) | |
tree | 38133a2fc612597a75fed1d13e5b4042f58a2b7e /vendor/github.com/tdewolff/parse/xml |
First commit
Diffstat (limited to 'vendor/github.com/tdewolff/parse/xml')
-rw-r--r-- | vendor/github.com/tdewolff/parse/xml/README.md | 101 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/parse/xml/lex.go | 345 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/parse/xml/lex_test.go | 193 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/parse/xml/util.go | 108 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/parse/xml/util_test.go | 63 |
5 files changed, 810 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/parse/xml/README.md b/vendor/github.com/tdewolff/parse/xml/README.md new file mode 100644 index 0000000..3aaf6f4 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/xml/README.md @@ -0,0 +1,101 @@ +# XML [![GoDoc](http://godoc.org/github.com/tdewolff/parse/xml?status.svg)](http://godoc.org/github.com/tdewolff/parse/xml) [![GoCover](http://gocover.io/_badge/github.com/tdewolff/parse/xml)](http://gocover.io/github.com/tdewolff/parse/xml) + +This package is an XML lexer written in [Go][1]. It follows the specification at [Extensible Markup Language (XML) 1.0 (Fifth Edition)](http://www.w3.org/TR/REC-xml/). The lexer takes an io.Reader and converts it into tokens until the EOF. + +## Installation +Run the following command + + go get github.com/tdewolff/parse/xml + +or add the following import and run project with `go get` + + import "github.com/tdewolff/parse/xml" + +## Lexer +### Usage +The following initializes a new Lexer with io.Reader `r`: +``` go +l := xml.NewLexer(r) +``` + +To tokenize until EOF an error, use: +``` go +for { + tt, data := l.Next() + switch tt { + case xml.ErrorToken: + // error or EOF set in l.Err() + return + case xml.StartTagToken: + // ... + for { + ttAttr, dataAttr := l.Next() + if ttAttr != xml.AttributeToken { + // handle StartTagCloseToken/StartTagCloseVoidToken/StartTagClosePIToken + break + } + // ... + } + case xml.EndTagToken: + // ... + } +} +``` + +All tokens: +``` go +ErrorToken TokenType = iota // extra token when errors occur +CommentToken +CDATAToken +StartTagToken +StartTagCloseToken +StartTagCloseVoidToken +StartTagClosePIToken +EndTagToken +AttributeToken +TextToken +``` + +### Examples +``` go +package main + +import ( + "os" + + "github.com/tdewolff/parse/xml" +) + +// Tokenize XML from stdin. +func main() { + l := xml.NewLexer(os.Stdin) + for { + tt, data := l.Next() + switch tt { + case xml.ErrorToken: + if l.Err() != io.EOF { + fmt.Println("Error on line", l.Line(), ":", l.Err()) + } + return + case xml.StartTagToken: + fmt.Println("Tag", string(data)) + for { + ttAttr, dataAttr := l.Next() + if ttAttr != xml.AttributeToken { + break + } + + key := dataAttr + val := l.AttrVal() + fmt.Println("Attribute", string(key), "=", string(val)) + } + // ... + } + } +} +``` + +## License +Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). + +[1]: http://golang.org/ "Go Language" diff --git a/vendor/github.com/tdewolff/parse/xml/lex.go b/vendor/github.com/tdewolff/parse/xml/lex.go new file mode 100644 index 0000000..0f1393c --- /dev/null +++ b/vendor/github.com/tdewolff/parse/xml/lex.go @@ -0,0 +1,345 @@ +// Package xml is an XML1.0 lexer following the specifications at http://www.w3.org/TR/xml/. +package xml // import "github.com/tdewolff/parse/xml" + +import ( + "io" + "strconv" + + "github.com/tdewolff/parse" + "github.com/tdewolff/parse/buffer" +) + +// TokenType determines the type of token, eg. a number or a semicolon. +type TokenType uint32 + +// TokenType values. +const ( + ErrorToken TokenType = iota // extra token when errors occur + CommentToken + DOCTYPEToken + CDATAToken + StartTagToken + StartTagPIToken + StartTagCloseToken + StartTagCloseVoidToken + StartTagClosePIToken + EndTagToken + AttributeToken + TextToken +) + +// String returns the string representation of a TokenType. +func (tt TokenType) String() string { + switch tt { + case ErrorToken: + return "Error" + case CommentToken: + return "Comment" + case DOCTYPEToken: + return "DOCTYPE" + case CDATAToken: + return "CDATA" + case StartTagToken: + return "StartTag" + case StartTagPIToken: + return "StartTagPI" + case StartTagCloseToken: + return "StartTagClose" + case StartTagCloseVoidToken: + return "StartTagCloseVoid" + case StartTagClosePIToken: + return "StartTagClosePI" + case EndTagToken: + return "EndTag" + case AttributeToken: + return "Attribute" + case TextToken: + return "Text" + } + return "Invalid(" + strconv.Itoa(int(tt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// Lexer is the state for the lexer. +type Lexer struct { + r *buffer.Lexer + err error + + inTag bool + + text []byte + attrVal []byte +} + +// NewLexer returns a new Lexer for a given io.Reader. +func NewLexer(r io.Reader) *Lexer { + return &Lexer{ + r: buffer.NewLexer(r), + } +} + +// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned. +func (l *Lexer) Err() error { + err := l.r.Err() + if err != nil { + return err + } + return l.err +} + +// Restore restores the NULL byte at the end of the buffer. +func (l *Lexer) Restore() { + l.r.Restore() +} + +// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message. +func (l *Lexer) Next() (TokenType, []byte) { + l.text = nil + var c byte + if l.inTag { + l.attrVal = nil + for { // before attribute name state + if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' { + l.r.Move(1) + continue + } + break + } + if c == 0 { + l.err = parse.NewErrorLexer("unexpected null character", l.r) + return ErrorToken, nil + } else if c != '>' && (c != '/' && c != '?' || l.r.Peek(1) != '>') { + return AttributeToken, l.shiftAttribute() + } + start := l.r.Pos() + l.inTag = false + if c == '/' { + l.r.Move(2) + l.text = l.r.Lexeme()[start:] + return StartTagCloseVoidToken, l.r.Shift() + } else if c == '?' { + l.r.Move(2) + l.text = l.r.Lexeme()[start:] + return StartTagClosePIToken, l.r.Shift() + } else { + l.r.Move(1) + l.text = l.r.Lexeme()[start:] + return StartTagCloseToken, l.r.Shift() + } + } + + for { + c = l.r.Peek(0) + if c == '<' { + if l.r.Pos() > 0 { + return TextToken, l.r.Shift() + } + c = l.r.Peek(1) + if c == '/' { + l.r.Move(2) + return EndTagToken, l.shiftEndTag() + } else if c == '!' { + l.r.Move(2) + if l.at('-', '-') { + l.r.Move(2) + return CommentToken, l.shiftCommentText() + } else if l.at('[', 'C', 'D', 'A', 'T', 'A', '[') { + l.r.Move(7) + return CDATAToken, l.shiftCDATAText() + } else if l.at('D', 'O', 'C', 'T', 'Y', 'P', 'E') { + l.r.Move(8) + return DOCTYPEToken, l.shiftDOCTYPEText() + } + l.r.Move(-2) + } else if c == '?' { + l.r.Move(2) + l.inTag = true + return StartTagPIToken, l.shiftStartTag() + } + l.r.Move(1) + l.inTag = true + return StartTagToken, l.shiftStartTag() + } else if c == 0 { + if l.r.Pos() > 0 { + return TextToken, l.r.Shift() + } + l.err = parse.NewErrorLexer("unexpected null character", l.r) + return ErrorToken, nil + } + l.r.Move(1) + } +} + +// Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters. +func (l *Lexer) Text() []byte { + return l.text +} + +// AttrVal returns the attribute value when an AttributeToken was returned from Next. +func (l *Lexer) AttrVal() []byte { + return l.attrVal +} + +//////////////////////////////////////////////////////////////// + +// The following functions follow the specifications at http://www.w3.org/html/wg/drafts/html/master/syntax.html + +func (l *Lexer) shiftDOCTYPEText() []byte { + inString := false + inBrackets := false + for { + c := l.r.Peek(0) + if c == '"' { + inString = !inString + } else if (c == '[' || c == ']') && !inString { + inBrackets = (c == '[') + } else if c == '>' && !inString && !inBrackets { + l.text = l.r.Lexeme()[9:] + l.r.Move(1) + return l.r.Shift() + } else if c == 0 { + l.text = l.r.Lexeme()[9:] + return l.r.Shift() + } + l.r.Move(1) + } +} + +func (l *Lexer) shiftCDATAText() []byte { + for { + c := l.r.Peek(0) + if c == ']' && l.r.Peek(1) == ']' && l.r.Peek(2) == '>' { + l.text = l.r.Lexeme()[9:] + l.r.Move(3) + return l.r.Shift() + } else if c == 0 { + l.text = l.r.Lexeme()[9:] + return l.r.Shift() + } + l.r.Move(1) + } +} + +func (l *Lexer) shiftCommentText() []byte { + for { + c := l.r.Peek(0) + if c == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' { + l.text = l.r.Lexeme()[4:] + l.r.Move(3) + return l.r.Shift() + } else if c == 0 { + return l.r.Shift() + } + l.r.Move(1) + } +} + +func (l *Lexer) shiftStartTag() []byte { + nameStart := l.r.Pos() + for { + if c := l.r.Peek(0); c == ' ' || c == '>' || (c == '/' || c == '?') && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == 0 { + break + } + l.r.Move(1) + } + l.text = l.r.Lexeme()[nameStart:] + return l.r.Shift() +} + +func (l *Lexer) shiftAttribute() []byte { + nameStart := l.r.Pos() + var c byte + for { // attribute name state + if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || (c == '/' || c == '?') && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == 0 { + break + } + l.r.Move(1) + } + nameEnd := l.r.Pos() + for { // after attribute name state + if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' { + l.r.Move(1) + continue + } + break + } + if c == '=' { + l.r.Move(1) + for { // before attribute value state + if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' { + l.r.Move(1) + continue + } + break + } + attrPos := l.r.Pos() + delim := c + if delim == '"' || delim == '\'' { // attribute value single- and double-quoted state + l.r.Move(1) + for { + c = l.r.Peek(0) + if c == delim { + l.r.Move(1) + break + } else if c == 0 { + break + } + l.r.Move(1) + if c == '\t' || c == '\n' || c == '\r' { + l.r.Lexeme()[l.r.Pos()-1] = ' ' + } + } + } else { // attribute value unquoted state + for { + if c = l.r.Peek(0); c == ' ' || c == '>' || (c == '/' || c == '?') && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == 0 { + break + } + l.r.Move(1) + } + } + l.attrVal = l.r.Lexeme()[attrPos:] + } else { + l.r.Rewind(nameEnd) + l.attrVal = nil + } + l.text = l.r.Lexeme()[nameStart:nameEnd] + return l.r.Shift() +} + +func (l *Lexer) shiftEndTag() []byte { + for { + c := l.r.Peek(0) + if c == '>' { + l.text = l.r.Lexeme()[2:] + l.r.Move(1) + break + } else if c == 0 { + l.text = l.r.Lexeme()[2:] + break + } + l.r.Move(1) + } + + end := len(l.text) + for end > 0 { + if c := l.text[end-1]; c == ' ' || c == '\t' || c == '\n' || c == '\r' { + end-- + continue + } + break + } + l.text = l.text[:end] + return l.r.Shift() +} + +//////////////////////////////////////////////////////////////// + +func (l *Lexer) at(b ...byte) bool { + for i, c := range b { + if l.r.Peek(i) != c { + return false + } + } + return true +} diff --git a/vendor/github.com/tdewolff/parse/xml/lex_test.go b/vendor/github.com/tdewolff/parse/xml/lex_test.go new file mode 100644 index 0000000..f8cdd17 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/xml/lex_test.go @@ -0,0 +1,193 @@ +package xml // import "github.com/tdewolff/parse/xml" + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/tdewolff/parse" + "github.com/tdewolff/test" +) + +type TTs []TokenType + +func TestTokens(t *testing.T) { + var tokenTests = []struct { + xml string + expected []TokenType + }{ + {"", TTs{}}, + {"<!-- comment -->", TTs{CommentToken}}, + {"<!-- comment \n multi \r line -->", TTs{CommentToken}}, + {"<foo/>", TTs{StartTagToken, StartTagCloseVoidToken}}, + {"<foo \t\r\n/>", TTs{StartTagToken, StartTagCloseVoidToken}}, + {"<foo:bar.qux-norf/>", TTs{StartTagToken, StartTagCloseVoidToken}}, + {"<foo></foo>", TTs{StartTagToken, StartTagCloseToken, EndTagToken}}, + {"<foo>text</foo>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}}, + {"<foo/> text", TTs{StartTagToken, StartTagCloseVoidToken, TextToken}}, + {"<a> <b> <c>text</c> </b> </a>", TTs{StartTagToken, StartTagCloseToken, TextToken, StartTagToken, StartTagCloseToken, TextToken, StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken, TextToken, EndTagToken}}, + {"<foo a='a' b=\"b\" c=c/>", TTs{StartTagToken, AttributeToken, AttributeToken, AttributeToken, StartTagCloseVoidToken}}, + {"<foo a=\"\"/>", TTs{StartTagToken, AttributeToken, StartTagCloseVoidToken}}, + {"<foo a-b=\"\"/>", TTs{StartTagToken, AttributeToken, StartTagCloseVoidToken}}, + {"<foo \nchecked \r\n value\r=\t'=/>\"' />", TTs{StartTagToken, AttributeToken, AttributeToken, StartTagCloseVoidToken}}, + {"<?xml?>", TTs{StartTagPIToken, StartTagClosePIToken}}, + {"<?xml a=\"a\" ?>", TTs{StartTagPIToken, AttributeToken, StartTagClosePIToken}}, + {"<?xml a=a?>", TTs{StartTagPIToken, AttributeToken, StartTagClosePIToken}}, + {"<![CDATA[ test ]]>", TTs{CDATAToken}}, + {"<!DOCTYPE>", TTs{DOCTYPEToken}}, + {"<!DOCTYPE note SYSTEM \"Note.dtd\">", TTs{DOCTYPEToken}}, + {`<!DOCTYPE note [<!ENTITY nbsp " "><!ENTITY writer "Writer: Donald Duck."><!ENTITY copyright "Copyright:]> W3Schools.">]>`, TTs{DOCTYPEToken}}, + {"<!foo>", TTs{StartTagToken, StartTagCloseToken}}, + + // early endings + {"<!-- comment", TTs{CommentToken}}, + {"<foo", TTs{StartTagToken}}, + {"</foo", TTs{EndTagToken}}, + {"<foo x", TTs{StartTagToken, AttributeToken}}, + {"<foo x=", TTs{StartTagToken, AttributeToken}}, + {"<foo x='", TTs{StartTagToken, AttributeToken}}, + {"<foo x=''", TTs{StartTagToken, AttributeToken}}, + {"<?xml", TTs{StartTagPIToken}}, + {"<![CDATA[ test", TTs{CDATAToken}}, + {"<!DOCTYPE note SYSTEM", TTs{DOCTYPEToken}}, + + // go fuzz + {"</", TTs{EndTagToken}}, + {"</\n", TTs{EndTagToken}}, + } + for _, tt := range tokenTests { + t.Run(tt.xml, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.xml)) + i := 0 + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } + test.That(t, i < len(tt.expected), "index", i, "must not exceed expected token types size", len(tt.expected)) + if i < len(tt.expected) { + test.T(t, token, tt.expected[i], "token types must match") + } + i++ + } + }) + } + + test.T(t, TokenType(100).String(), "Invalid(100)") +} + +func TestTags(t *testing.T) { + var tagTests = []struct { + xml string + expected string + }{ + {"<foo:bar.qux-norf/>", "foo:bar.qux-norf"}, + {"<?xml?>", "xml"}, + {"<foo?bar/qux>", "foo?bar/qux"}, + {"<!DOCTYPE note SYSTEM \"Note.dtd\">", " note SYSTEM \"Note.dtd\""}, + + // early endings + {"<foo ", "foo"}, + } + for _, tt := range tagTests { + t.Run(tt.xml, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.xml)) + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.Fail(t, "when error occurred we must be at the end") + break + } else if token == StartTagToken || token == StartTagPIToken || token == EndTagToken || token == DOCTYPEToken { + test.String(t, string(l.Text()), tt.expected, "tags must match") + break + } + } + }) + } +} + +func TestAttributes(t *testing.T) { + var attributeTests = []struct { + attr string + expected []string + }{ + {"<foo a=\"b\" />", []string{"a", "\"b\""}}, + {"<foo \nchecked \r\n value\r=\t'=/>\"' />", []string{"checked", "", "value", "'=/>\"'"}}, + {"<foo bar=\" a \n\t\r b \" />", []string{"bar", "\" a b \""}}, + {"<?xml a=b?>", []string{"a", "b"}}, + {"<foo /=? >", []string{"/", "?"}}, + + // early endings + {"<foo x", []string{"x", ""}}, + {"<foo x=", []string{"x", ""}}, + {"<foo x='", []string{"x", "'"}}, + } + for _, tt := range attributeTests { + t.Run(tt.attr, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.attr)) + i := 0 + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } else if token == AttributeToken { + test.That(t, i+1 < len(tt.expected), "index", i+1, "must not exceed expected attributes size", len(tt.expected)) + if i+1 < len(tt.expected) { + test.String(t, string(l.Text()), tt.expected[i], "attribute keys must match") + test.String(t, string(l.AttrVal()), tt.expected[i+1], "attribute keys must match") + i += 2 + } + } + } + }) + } +} + +func TestErrors(t *testing.T) { + var errorTests = []struct { + xml string + col int + }{ + {"a\x00b", 2}, + } + for _, tt := range errorTests { + t.Run(tt.xml, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.xml)) + for { + token, _ := l.Next() + if token == ErrorToken { + if tt.col == 0 { + test.T(t, l.Err(), io.EOF) + } else if perr, ok := l.Err().(*parse.Error); ok { + test.T(t, perr.Col, tt.col) + } else { + test.Fail(t, "bad error:", l.Err()) + } + break + } + } + }) + } +} + +//////////////////////////////////////////////////////////////// + +func ExampleNewLexer() { + l := NewLexer(bytes.NewBufferString("<span class='user'>John Doe</span>")) + out := "" + for { + tt, data := l.Next() + if tt == ErrorToken { + break + } + out += string(data) + } + fmt.Println(out) + // Output: <span class='user'>John Doe</span> +} diff --git a/vendor/github.com/tdewolff/parse/xml/util.go b/vendor/github.com/tdewolff/parse/xml/util.go new file mode 100644 index 0000000..1501b9b --- /dev/null +++ b/vendor/github.com/tdewolff/parse/xml/util.go @@ -0,0 +1,108 @@ +package xml // import "github.com/tdewolff/parse/xml" + +import "github.com/tdewolff/parse" + +var ( + ltEntityBytes = []byte("<") + ampEntityBytes = []byte("&") + singleQuoteEntityBytes = []byte("'") + doubleQuoteEntityBytes = []byte(""") +) + +// EscapeAttrVal returns the escape attribute value bytes without quotes. +func EscapeAttrVal(buf *[]byte, b []byte) []byte { + singles := 0 + doubles := 0 + for i, c := range b { + if c == '&' { + if quote, n := parse.QuoteEntity(b[i:]); n > 0 { + if quote == '"' { + doubles++ + } else { + singles++ + } + } + } else if c == '"' { + doubles++ + } else if c == '\'' { + singles++ + } + } + + n := len(b) + 2 + var quote byte + var escapedQuote []byte + if doubles > singles { + n += singles * 4 + quote = '\'' + escapedQuote = singleQuoteEntityBytes + } else { + n += doubles * 4 + quote = '"' + escapedQuote = doubleQuoteEntityBytes + } + if n > cap(*buf) { + *buf = make([]byte, 0, n) // maximum size, not actual size + } + t := (*buf)[:n] // maximum size, not actual size + t[0] = quote + j := 1 + start := 0 + for i, c := range b { + if c == '&' { + if entityQuote, n := parse.QuoteEntity(b[i:]); n > 0 { + j += copy(t[j:], b[start:i]) + if entityQuote != quote { + t[j] = entityQuote + j++ + } else { + j += copy(t[j:], escapedQuote) + } + start = i + n + } + } else if c == quote { + j += copy(t[j:], b[start:i]) + j += copy(t[j:], escapedQuote) + start = i + 1 + } + } + j += copy(t[j:], b[start:]) + t[j] = quote + return t[:j+1] +} + +// EscapeCDATAVal returns the escaped text bytes. +func EscapeCDATAVal(buf *[]byte, b []byte) ([]byte, bool) { + n := 0 + for _, c := range b { + if c == '<' || c == '&' { + if c == '<' { + n += 3 // < + } else { + n += 4 // & + } + if n > len("<![CDATA[]]>") { + return b, false + } + } + } + if len(b)+n > cap(*buf) { + *buf = make([]byte, 0, len(b)+n) + } + t := (*buf)[:len(b)+n] + j := 0 + start := 0 + for i, c := range b { + if c == '<' { + j += copy(t[j:], b[start:i]) + j += copy(t[j:], ltEntityBytes) + start = i + 1 + } else if c == '&' { + j += copy(t[j:], b[start:i]) + j += copy(t[j:], ampEntityBytes) + start = i + 1 + } + } + j += copy(t[j:], b[start:]) + return t[:j], true +} diff --git a/vendor/github.com/tdewolff/parse/xml/util_test.go b/vendor/github.com/tdewolff/parse/xml/util_test.go new file mode 100644 index 0000000..65be6b8 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/xml/util_test.go @@ -0,0 +1,63 @@ +package xml // import "github.com/tdewolff/parse/xml" + +import ( + "testing" + + "github.com/tdewolff/test" +) + +func TestEscapeAttrVal(t *testing.T) { + var attrValTests = []struct { + attrVal string + expected string + }{ + {"xyz", "\"xyz\""}, + {"", "\"\""}, + {"x&z", "\"x&z\""}, + {"x'z", "\"x'z\""}, + {"x\"z", "'x\"z'"}, + {"a'b=\"\"", "'a'b=\"\"'"}, + {"'x'\"'z'", "\"x'"'z\""}, + {"\"x"'"z\"", "'x\"'\"z'"}, + {"a'b=\"\"", "'a'b=\"\"'"}, + } + var buf []byte + for _, tt := range attrValTests { + t.Run(tt.attrVal, func(t *testing.T) { + b := []byte(tt.attrVal) + if len(b) > 1 && (b[0] == '"' || b[0] == '\'') && b[0] == b[len(b)-1] { + b = b[1 : len(b)-1] + } + val := EscapeAttrVal(&buf, []byte(b)) + test.String(t, string(val), tt.expected) + }) + } +} + +func TestEscapeCDATAVal(t *testing.T) { + var CDATAValTests = []struct { + CDATAVal string + expected string + }{ + {"<![CDATA[<b>]]>", "<b>"}, + {"<![CDATA[abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz]]>", "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"}, + {"<![CDATA[ <b> ]]>", " <b> "}, + {"<![CDATA[<<<<<]]>", "<![CDATA[<<<<<]]>"}, + {"<![CDATA[&]]>", "&"}, + {"<![CDATA[&&&&]]>", "<![CDATA[&&&&]]>"}, + {"<![CDATA[ a ]]>", " a "}, + {"<![CDATA[]]>", ""}, + } + var buf []byte + for _, tt := range CDATAValTests { + t.Run(tt.CDATAVal, func(t *testing.T) { + b := []byte(tt.CDATAVal[len("<![CDATA[") : len(tt.CDATAVal)-len("]]>")]) + data, useText := EscapeCDATAVal(&buf, b) + text := string(data) + if !useText { + text = "<![CDATA[" + text + "]]>" + } + test.String(t, text, tt.expected) + }) + } +} |