diff options
Diffstat (limited to 'vendor/github.com/tdewolff/parse')
54 files changed, 9537 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/parse/.travis.yml b/vendor/github.com/tdewolff/parse/.travis.yml new file mode 100644 index 0000000..a24cef7 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/.travis.yml @@ -0,0 +1,5 @@ +language: go +before_install: + - go get github.com/mattn/goveralls +script: + - goveralls -v -service travis-ci -repotoken $COVERALLS_TOKEN || go test -v ./...
\ No newline at end of file diff --git a/vendor/github.com/tdewolff/parse/LICENSE.md b/vendor/github.com/tdewolff/parse/LICENSE.md new file mode 100644 index 0000000..41677de --- /dev/null +++ b/vendor/github.com/tdewolff/parse/LICENSE.md @@ -0,0 +1,22 @@ +Copyright (c) 2015 Taco de Wolff + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, + copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following + conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file diff --git a/vendor/github.com/tdewolff/parse/README.md b/vendor/github.com/tdewolff/parse/README.md new file mode 100644 index 0000000..f6452a7 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/README.md @@ -0,0 +1,66 @@ +# Parse [![Build Status](https://travis-ci.org/tdewolff/parse.svg?branch=master)](https://travis-ci.org/tdewolff/parse) [![GoDoc](http://godoc.org/github.com/tdewolff/parse?status.svg)](http://godoc.org/github.com/tdewolff/parse) [![Coverage Status](https://coveralls.io/repos/github/tdewolff/parse/badge.svg?branch=master)](https://coveralls.io/github/tdewolff/parse?branch=master) + +This package contains several lexers and parsers written in [Go][1]. All subpackages are built to be streaming, high performance and to be in accordance with the official (latest) specifications. + +The lexers are implemented using `buffer.Lexer` in https://github.com/tdewolff/parse/buffer and the parsers work on top of the lexers. Some subpackages have hashes defined (using [Hasher](https://github.com/tdewolff/hasher)) that speed up common byte-slice comparisons. + +## Buffer +### Reader +Reader is a wrapper around a `[]byte` that implements the `io.Reader` interface. It is a much thinner layer than `bytes.Buffer` provides and is therefore faster. + +### Writer +Writer is a buffer that implements the `io.Writer` interface. It is a much thinner layer than `bytes.Buffer` provides and is therefore faster. It will expand the buffer when needed. + +The reset functionality allows for better memory reuse. After calling `Reset`, it will overwrite the current buffer and thus reduce allocations. + +### Lexer +Lexer is a read buffer specifically designed for building lexers. It keeps track of two positions: a start and end position. The start position is the beginning of the current token being parsed, the end position is being moved forward until a valid token is found. Calling `Shift` will collapse the positions to the end and return the parsed `[]byte`. + +Moving the end position can go through `Move(int)` which also accepts negative integers. One can also use `Pos() int` to try and parse a token, and if it fails rewind with `Rewind(int)`, passing the previously saved position. + +`Peek(int) byte` will peek forward (relative to the end position) and return the byte at that location. `PeekRune(int) (rune, int)` returns UTF-8 runes and its length at the given **byte** position. Upon an error `Peek` will return `0`, the **user must peek at every character** and not skip any, otherwise it may skip a `0` and panic on out-of-bounds indexing. + +`Lexeme() []byte` will return the currently selected bytes, `Skip()` will collapse the selection. `Shift() []byte` is a combination of `Lexeme() []byte` and `Skip()`. + +When the passed `io.Reader` returned an error, `Err() error` will return that error even if not at the end of the buffer. + +### StreamLexer +StreamLexer behaves like Lexer but uses a buffer pool to read in chunks from `io.Reader`, retaining old buffers in memory that are still in use, and re-using old buffers otherwise. Calling `Free(n int)` frees up `n` bytes from the internal buffer(s). It holds an array of buffers to accommodate for keeping everything in-memory. Calling `ShiftLen() int` returns the number of bytes that have been shifted since the previous call to `ShiftLen`, which can be used to specify how many bytes need to be freed up from the buffer. If you don't need to keep returned byte slices around, call `Free(ShiftLen())` after every `Shift` call. + +## Strconv +This package contains string conversion function much like the standard library's `strconv` package, but it is specifically tailored for the performance needs within the `minify` package. + +For example, the floating-point to string conversion function is approximately twice as fast as the standard library, but it is not as precise. + +## CSS +This package is a CSS3 lexer and parser. Both follow the specification at [CSS Syntax Module Level 3](http://www.w3.org/TR/css-syntax-3/). The lexer takes an io.Reader and converts it into tokens until the EOF. The parser returns a parse tree of the full io.Reader input stream, but the low-level `Next` function can be used for stream parsing to returns grammar units until the EOF. + +[See README here](https://github.com/tdewolff/parse/tree/master/css). + +## HTML +This package is an HTML5 lexer. It follows the specification at [The HTML syntax](http://www.w3.org/TR/html5/syntax.html). The lexer takes an io.Reader and converts it into tokens until the EOF. + +[See README here](https://github.com/tdewolff/parse/tree/master/html). + +## JS +This package is a JS lexer (ECMA-262, edition 6.0). It follows the specification at [ECMAScript Language Specification](http://www.ecma-international.org/ecma-262/6.0/). The lexer takes an io.Reader and converts it into tokens until the EOF. + +[See README here](https://github.com/tdewolff/parse/tree/master/js). + +## JSON +This package is a JSON parser (ECMA-404). It follows the specification at [JSON](http://json.org/). The parser takes an io.Reader and converts it into tokens until the EOF. + +[See README here](https://github.com/tdewolff/parse/tree/master/json). + +## SVG +This package contains common hashes for SVG1.1 tags and attributes. + +## XML +This package is an XML1.0 lexer. It follows the specification at [Extensible Markup Language (XML) 1.0 (Fifth Edition)](http://www.w3.org/TR/xml/). The lexer takes an io.Reader and converts it into tokens until the EOF. + +[See README here](https://github.com/tdewolff/parse/tree/master/xml). + +## License +Released under the [MIT license](LICENSE.md). + +[1]: http://golang.org/ "Go Language" diff --git a/vendor/github.com/tdewolff/parse/buffer/buffer.go b/vendor/github.com/tdewolff/parse/buffer/buffer.go new file mode 100644 index 0000000..3094aa9 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/buffer/buffer.go @@ -0,0 +1,15 @@ +/* +Package buffer contains buffer and wrapper types for byte slices. It is useful for writing lexers or other high-performance byte slice handling. + +The `Reader` and `Writer` types implement the `io.Reader` and `io.Writer` respectively and provide a thinner and faster interface than `bytes.Buffer`. +The `Lexer` type is useful for building lexers because it keeps track of the start and end position of a byte selection, and shifts the bytes whenever a valid token is found. +The `StreamLexer` does the same, but keeps a buffer pool so that it reads a limited amount at a time, allowing to parse from streaming sources. +*/ +package buffer // import "github.com/tdewolff/parse/buffer" + +// defaultBufSize specifies the default initial length of internal buffers. +var defaultBufSize = 4096 + +// MinBuf specifies the default initial length of internal buffers. +// Solely here to support old versions of parse. +var MinBuf = defaultBufSize diff --git a/vendor/github.com/tdewolff/parse/buffer/lexer.go b/vendor/github.com/tdewolff/parse/buffer/lexer.go new file mode 100644 index 0000000..4bfb35a --- /dev/null +++ b/vendor/github.com/tdewolff/parse/buffer/lexer.go @@ -0,0 +1,153 @@ +package buffer // import "github.com/tdewolff/parse/buffer" + +import ( + "io" + "io/ioutil" +) + +var nullBuffer = []byte{0} + +// Lexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader. +// It keeps data in-memory until Free, taking a byte length, is called to move beyond the data. +type Lexer struct { + buf []byte + pos int // index in buf + start int // index in buf + err error + + restore func() +} + +// NewLexerBytes returns a new Lexer for a given io.Reader, and uses ioutil.ReadAll to read it into a byte slice. +// If the io.Reader implements Bytes, that is used instead. +// It will append a NULL at the end of the buffer. +func NewLexer(r io.Reader) *Lexer { + var b []byte + if r != nil { + if buffer, ok := r.(interface { + Bytes() []byte + }); ok { + b = buffer.Bytes() + } else { + var err error + b, err = ioutil.ReadAll(r) + if err != nil { + return &Lexer{ + buf: []byte{0}, + err: err, + } + } + } + } + return NewLexerBytes(b) +} + +// NewLexerBytes returns a new Lexer for a given byte slice, and appends NULL at the end. +// To avoid reallocation, make sure the capacity has room for one more byte. +func NewLexerBytes(b []byte) *Lexer { + z := &Lexer{ + buf: b, + } + + n := len(b) + if n == 0 { + z.buf = nullBuffer + } else if b[n-1] != 0 { + // Append NULL to buffer, but try to avoid reallocation + if cap(b) > n { + // Overwrite next byte but restore when done + b = b[:n+1] + c := b[n] + b[n] = 0 + + z.buf = b + z.restore = func() { + b[n] = c + } + } else { + z.buf = append(b, 0) + } + } + return z +} + +// Restore restores the replaced byte past the end of the buffer by NULL. +func (z *Lexer) Restore() { + if z.restore != nil { + z.restore() + z.restore = nil + } +} + +// Err returns the error returned from io.Reader or io.EOF when the end has been reached. +func (z *Lexer) Err() error { + if z.err != nil { + return z.err + } else if z.pos >= len(z.buf)-1 { + return io.EOF + } + return nil +} + +// Peek returns the ith byte relative to the end position. +// Peek returns 0 when an error has occurred, Err returns the error. +func (z *Lexer) Peek(pos int) byte { + pos += z.pos + return z.buf[pos] +} + +// PeekRune returns the rune and rune length of the ith byte relative to the end position. +func (z *Lexer) PeekRune(pos int) (rune, int) { + // from unicode/utf8 + c := z.Peek(pos) + if c < 0xC0 || z.Peek(pos+1) == 0 { + return rune(c), 1 + } else if c < 0xE0 || z.Peek(pos+2) == 0 { + return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2 + } else if c < 0xF0 || z.Peek(pos+3) == 0 { + return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3 + } + return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4 +} + +// Move advances the position. +func (z *Lexer) Move(n int) { + z.pos += n +} + +// Pos returns a mark to which can be rewinded. +func (z *Lexer) Pos() int { + return z.pos - z.start +} + +// Rewind rewinds the position to the given position. +func (z *Lexer) Rewind(pos int) { + z.pos = z.start + pos +} + +// Lexeme returns the bytes of the current selection. +func (z *Lexer) Lexeme() []byte { + return z.buf[z.start:z.pos] +} + +// Skip collapses the position to the end of the selection. +func (z *Lexer) Skip() { + z.start = z.pos +} + +// Shift returns the bytes of the current selection and collapses the position to the end of the selection. +func (z *Lexer) Shift() []byte { + b := z.buf[z.start:z.pos] + z.start = z.pos + return b +} + +// Offset returns the character position in the buffer. +func (z *Lexer) Offset() int { + return z.pos +} + +// Bytes returns the underlying buffer. +func (z *Lexer) Bytes() []byte { + return z.buf +} diff --git a/vendor/github.com/tdewolff/parse/buffer/lexer_test.go b/vendor/github.com/tdewolff/parse/buffer/lexer_test.go new file mode 100644 index 0000000..1317dd0 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/buffer/lexer_test.go @@ -0,0 +1,91 @@ +package buffer // import "github.com/tdewolff/parse/buffer" + +import ( + "bytes" + "io" + "testing" + + "github.com/tdewolff/test" +) + +func TestLexer(t *testing.T) { + s := `Lorem ipsum dolor sit amet, consectetur adipiscing elit.` + z := NewLexer(bytes.NewBufferString(s)) + + test.T(t, z.err, nil, "buffer has no error") + test.T(t, z.Err(), nil, "buffer is at EOF but must not return EOF until we reach that") + test.That(t, z.Pos() == 0, "buffer must start at position 0") + test.That(t, z.Peek(0) == 'L', "first character must be 'L'") + test.That(t, z.Peek(1) == 'o', "second character must be 'o'") + + z.Move(1) + test.That(t, z.Peek(0) == 'o', "must be 'o' at position 1") + test.That(t, z.Peek(1) == 'r', "must be 'r' at position 1") + z.Rewind(6) + test.That(t, z.Peek(0) == 'i', "must be 'i' at position 6") + test.That(t, z.Peek(1) == 'p', "must be 'p' at position 7") + + test.Bytes(t, z.Lexeme(), []byte("Lorem "), "buffered string must now read 'Lorem ' when at position 6") + test.Bytes(t, z.Shift(), []byte("Lorem "), "shift must return the buffered string") + test.That(t, z.Pos() == 0, "after shifting position must be 0") + test.That(t, z.Peek(0) == 'i', "must be 'i' at position 0 after shifting") + test.That(t, z.Peek(1) == 'p', "must be 'p' at position 1 after shifting") + test.T(t, z.Err(), nil, "error must be nil at this point") + + z.Move(len(s) - len("Lorem ") - 1) + test.T(t, z.Err(), nil, "error must be nil just before the end of the buffer") + z.Skip() + test.That(t, z.Pos() == 0, "after skipping position must be 0") + z.Move(1) + test.T(t, z.Err(), io.EOF, "error must be EOF when past the buffer") + z.Move(-1) + test.T(t, z.Err(), nil, "error must be nil just before the end of the buffer, even when it has been past the buffer") +} + +func TestLexerRunes(t *testing.T) { + z := NewLexer(bytes.NewBufferString("aæ†\U00100000")) + r, n := z.PeekRune(0) + test.That(t, n == 1, "first character must be length 1") + test.That(t, r == 'a', "first character must be rune 'a'") + r, n = z.PeekRune(1) + test.That(t, n == 2, "second character must be length 2") + test.That(t, r == 'æ', "second character must be rune 'æ'") + r, n = z.PeekRune(3) + test.That(t, n == 3, "fourth character must be length 3") + test.That(t, r == '†', "fourth character must be rune '†'") + r, n = z.PeekRune(6) + test.That(t, n == 4, "seventh character must be length 4") + test.That(t, r == '\U00100000', "seventh character must be rune '\U00100000'") +} + +func TestLexerBadRune(t *testing.T) { + z := NewLexer(bytes.NewBufferString("\xF0")) // expect four byte rune + r, n := z.PeekRune(0) + test.T(t, n, 1, "length") + test.T(t, r, rune(0xF0), "rune") +} + +func TestLexerZeroLen(t *testing.T) { + z := NewLexer(test.NewPlainReader(bytes.NewBufferString(""))) + test.That(t, z.Peek(0) == 0, "first character must yield error") +} + +func TestLexerEmptyReader(t *testing.T) { + z := NewLexer(test.NewEmptyReader()) + test.That(t, z.Peek(0) == 0, "first character must yield error") + test.T(t, z.Err(), io.EOF, "error must be EOF") + test.That(t, z.Peek(0) == 0, "second peek must also yield error") +} + +func TestLexerErrorReader(t *testing.T) { + z := NewLexer(test.NewErrorReader(0)) + test.That(t, z.Peek(0) == 0, "first character must yield error") + test.T(t, z.Err(), test.ErrPlain, "error must be ErrPlain") + test.That(t, z.Peek(0) == 0, "second peek must also yield error") +} + +func TestLexerBytes(t *testing.T) { + b := []byte{'t', 'e', 's', 't'} + z := NewLexerBytes(b) + test.That(t, z.Peek(4) == 0, "fifth character must yield NULL") +} diff --git a/vendor/github.com/tdewolff/parse/buffer/reader.go b/vendor/github.com/tdewolff/parse/buffer/reader.go new file mode 100644 index 0000000..f1a0925 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/buffer/reader.go @@ -0,0 +1,44 @@ +package buffer // import "github.com/tdewolff/parse/buffer" + +import "io" + +// Reader implements an io.Reader over a byte slice. +type Reader struct { + buf []byte + pos int +} + +// NewReader returns a new Reader for a given byte slice. +func NewReader(buf []byte) *Reader { + return &Reader{ + buf: buf, + } +} + +// Read reads bytes into the given byte slice and returns the number of bytes read and an error if occurred. +func (r *Reader) Read(b []byte) (n int, err error) { + if len(b) == 0 { + return 0, nil + } + if r.pos >= len(r.buf) { + return 0, io.EOF + } + n = copy(b, r.buf[r.pos:]) + r.pos += n + return +} + +// Bytes returns the underlying byte slice. +func (r *Reader) Bytes() []byte { + return r.buf +} + +// Reset resets the position of the read pointer to the beginning of the underlying byte slice. +func (r *Reader) Reset() { + r.pos = 0 +} + +// Len returns the length of the buffer. +func (r *Reader) Len() int { + return len(r.buf) +} diff --git a/vendor/github.com/tdewolff/parse/buffer/reader_test.go b/vendor/github.com/tdewolff/parse/buffer/reader_test.go new file mode 100644 index 0000000..73600ec --- /dev/null +++ b/vendor/github.com/tdewolff/parse/buffer/reader_test.go @@ -0,0 +1,49 @@ +package buffer // import "github.com/tdewolff/parse/buffer" + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/tdewolff/test" +) + +func TestReader(t *testing.T) { + s := []byte("abcde") + r := NewReader(s) + test.Bytes(t, r.Bytes(), s, "reader must return bytes stored") + + buf := make([]byte, 3) + n, err := r.Read(buf) + test.T(t, err, nil, "error") + test.That(t, n == 3, "first read must read 3 characters") + test.Bytes(t, buf, []byte("abc"), "first read must match 'abc'") + + n, err = r.Read(buf) + test.T(t, err, nil, "error") + test.That(t, n == 2, "second read must read 2 characters") + test.Bytes(t, buf[:n], []byte("de"), "second read must match 'de'") + + n, err = r.Read(buf) + test.T(t, err, io.EOF, "error") + test.That(t, n == 0, "third read must read 0 characters") + + n, err = r.Read(nil) + test.T(t, err, nil, "error") + test.That(t, n == 0, "read to nil buffer must return 0 characters read") + + r.Reset() + n, err = r.Read(buf) + test.T(t, err, nil, "error") + test.That(t, n == 3, "read after reset must read 3 characters") + test.Bytes(t, buf, []byte("abc"), "read after reset must match 'abc'") +} + +func ExampleNewReader() { + r := NewReader([]byte("Lorem ipsum")) + w := &bytes.Buffer{} + io.Copy(w, r) + fmt.Println(w.String()) + // Output: Lorem ipsum +} diff --git a/vendor/github.com/tdewolff/parse/buffer/streamlexer.go b/vendor/github.com/tdewolff/parse/buffer/streamlexer.go new file mode 100644 index 0000000..3ed3282 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/buffer/streamlexer.go @@ -0,0 +1,223 @@ +package buffer // import "github.com/tdewolff/parse/buffer" + +import ( + "io" +) + +type block struct { + buf []byte + next int // index in pool plus one + active bool +} + +type bufferPool struct { + pool []block + head int // index in pool plus one + tail int // index in pool plus one + + pos int // byte pos in tail +} + +func (z *bufferPool) swap(oldBuf []byte, size int) []byte { + // find new buffer that can be reused + swap := -1 + for i := 0; i < len(z.pool); i++ { + if !z.pool[i].active && size <= cap(z.pool[i].buf) { + swap = i + break + } + } + if swap == -1 { // no free buffer found for reuse + if z.tail == 0 && z.pos >= len(oldBuf) && size <= cap(oldBuf) { // but we can reuse the current buffer! + z.pos -= len(oldBuf) + return oldBuf[:0] + } + // allocate new + z.pool = append(z.pool, block{make([]byte, 0, size), 0, true}) + swap = len(z.pool) - 1 + } + + newBuf := z.pool[swap].buf + + // put current buffer into pool + z.pool[swap] = block{oldBuf, 0, true} + if z.head != 0 { + z.pool[z.head-1].next = swap + 1 + } + z.head = swap + 1 + if z.tail == 0 { + z.tail = swap + 1 + } + + return newBuf[:0] +} + +func (z *bufferPool) free(n int) { + z.pos += n + // move the tail over to next buffers + for z.tail != 0 && z.pos >= len(z.pool[z.tail-1].buf) { + z.pos -= len(z.pool[z.tail-1].buf) + newTail := z.pool[z.tail-1].next + z.pool[z.tail-1].active = false // after this, any thread may pick up the inactive buffer, so it can't be used anymore + z.tail = newTail + } + if z.tail == 0 { + z.head = 0 + } +} + +// StreamLexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader. +// It keeps data in-memory until Free, taking a byte length, is called to move beyond the data. +type StreamLexer struct { + r io.Reader + err error + + pool bufferPool + + buf []byte + start int // index in buf + pos int // index in buf + prevStart int + + free int +} + +// NewStreamLexer returns a new StreamLexer for a given io.Reader with a 4kB estimated buffer size. +// If the io.Reader implements Bytes, that buffer is used instead. +func NewStreamLexer(r io.Reader) *StreamLexer { + return NewStreamLexerSize(r, defaultBufSize) +} + +// NewStreamLexerSize returns a new StreamLexer for a given io.Reader and estimated required buffer size. +// If the io.Reader implements Bytes, that buffer is used instead. +func NewStreamLexerSize(r io.Reader, size int) *StreamLexer { + // if reader has the bytes in memory already, use that instead + if buffer, ok := r.(interface { + Bytes() []byte + }); ok { + return &StreamLexer{ + err: io.EOF, + buf: buffer.Bytes(), + } + } + return &StreamLexer{ + r: r, + buf: make([]byte, 0, size), + } +} + +func (z *StreamLexer) read(pos int) byte { + if z.err != nil { + return 0 + } + + // free unused bytes + z.pool.free(z.free) + z.free = 0 + + // get new buffer + c := cap(z.buf) + p := pos - z.start + 1 + if 2*p > c { // if the token is larger than half the buffer, increase buffer size + c = 2*c + p + } + d := len(z.buf) - z.start + buf := z.pool.swap(z.buf[:z.start], c) + copy(buf[:d], z.buf[z.start:]) // copy the left-overs (unfinished token) from the old buffer + + // read in new data for the rest of the buffer + var n int + for pos-z.start >= d && z.err == nil { + n, z.err = z.r.Read(buf[d:cap(buf)]) + d += n + } + pos -= z.start + z.pos -= z.start + z.start, z.buf = 0, buf[:d] + if pos >= d { + return 0 + } + return z.buf[pos] +} + +// Err returns the error returned from io.Reader. It may still return valid bytes for a while though. +func (z *StreamLexer) Err() error { + if z.err == io.EOF && z.pos < len(z.buf) { + return nil + } + return z.err +} + +// Free frees up bytes of length n from previously shifted tokens. +// Each call to Shift should at one point be followed by a call to Free with a length returned by ShiftLen. +func (z *StreamLexer) Free(n int) { + z.free += n +} + +// Peek returns the ith byte relative to the end position and possibly does an allocation. +// Peek returns zero when an error has occurred, Err returns the error. +// TODO: inline function +func (z *StreamLexer) Peek(pos int) byte { + pos += z.pos + if uint(pos) < uint(len(z.buf)) { // uint for BCE + return z.buf[pos] + } + return z.read(pos) +} + +// PeekRune returns the rune and rune length of the ith byte relative to the end position. +func (z *StreamLexer) PeekRune(pos int) (rune, int) { + // from unicode/utf8 + c := z.Peek(pos) + if c < 0xC0 { + return rune(c), 1 + } else if c < 0xE0 { + return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2 + } else if c < 0xF0 { + return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3 + } + return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4 +} + +// Move advances the position. +func (z *StreamLexer) Move(n int) { + z.pos += n +} + +// Pos returns a mark to which can be rewinded. +func (z *StreamLexer) Pos() int { + return z.pos - z.start +} + +// Rewind rewinds the position to the given position. +func (z *StreamLexer) Rewind(pos int) { + z.pos = z.start + pos +} + +// Lexeme returns the bytes of the current selection. +func (z *StreamLexer) Lexeme() []byte { + return z.buf[z.start:z.pos] +} + +// Skip collapses the position to the end of the selection. +func (z *StreamLexer) Skip() { + z.start = z.pos +} + +// Shift returns the bytes of the current selection and collapses the position to the end of the selection. +// It also returns the number of bytes we moved since the last call to Shift. This can be used in calls to Free. +func (z *StreamLexer) Shift() []byte { + if z.pos > len(z.buf) { // make sure we peeked at least as much as we shift + z.read(z.pos - 1) + } + b := z.buf[z.start:z.pos] + z.start = z.pos + return b +} + +// ShiftLen returns the number of bytes moved since the last call to ShiftLen. This can be used in calls to Free because it takes into account multiple Shifts or Skips. +func (z *StreamLexer) ShiftLen() int { + n := z.start - z.prevStart + z.prevStart = z.start + return n +} diff --git a/vendor/github.com/tdewolff/parse/buffer/streamlexer_test.go b/vendor/github.com/tdewolff/parse/buffer/streamlexer_test.go new file mode 100644 index 0000000..d37b0c1 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/buffer/streamlexer_test.go @@ -0,0 +1,148 @@ +package buffer // import "github.com/tdewolff/parse/buffer" + +import ( + "bytes" + "io" + "testing" + + "github.com/tdewolff/test" +) + +func TestBufferPool(t *testing.T) { + z := &bufferPool{} + + lorem := []byte("Lorem ipsum") + dolor := []byte("dolor sit amet") + consectetur := []byte("consectetur adipiscing elit") + + // set lorem as first buffer and get new dolor buffer + b := z.swap(lorem, len(dolor)) + test.That(t, len(b) == 0) + test.That(t, cap(b) == len(dolor)) + b = append(b, dolor...) + + // free first buffer so it will be reused + z.free(len(lorem)) + b = z.swap(b, len(lorem)) + b = b[:len(lorem)] + test.Bytes(t, b, lorem) + + b = z.swap(b, len(consectetur)) + b = append(b, consectetur...) + + // free in advance to reuse the same buffer + z.free(len(dolor) + len(lorem) + len(consectetur)) + test.That(t, z.head == 0) + b = z.swap(b, len(consectetur)) + b = b[:len(consectetur)] + test.Bytes(t, b, consectetur) + + // free in advance but request larger buffer + z.free(len(consectetur)) + b = z.swap(b, len(consectetur)+1) + b = append(b, consectetur...) + b = append(b, '.') + test.That(t, cap(b) == len(consectetur)+1) +} + +func TestStreamLexer(t *testing.T) { + s := `Lorem ipsum dolor sit amet, consectetur adipiscing elit.` + z := NewStreamLexer(bytes.NewBufferString(s)) + + test.T(t, z.err, io.EOF, "buffer must be fully in memory") + test.T(t, z.Err(), nil, "buffer is at EOF but must not return EOF until we reach that") + test.That(t, z.Pos() == 0, "buffer must start at position 0") + test.That(t, z.Peek(0) == 'L', "first character must be 'L'") + test.That(t, z.Peek(1) == 'o', "second character must be 'o'") + + z.Move(1) + test.That(t, z.Peek(0) == 'o', "must be 'o' at position 1") + test.That(t, z.Peek(1) == 'r', "must be 'r' at position 1") + z.Rewind(6) + test.That(t, z.Peek(0) == 'i', "must be 'i' at position 6") + test.That(t, z.Peek(1) == 'p', "must be 'p' at position 7") + + test.Bytes(t, z.Lexeme(), []byte("Lorem "), "buffered string must now read 'Lorem ' when at position 6") + test.Bytes(t, z.Shift(), []byte("Lorem "), "shift must return the buffered string") + test.That(t, z.ShiftLen() == len("Lorem "), "shifted length must equal last shift") + test.That(t, z.Pos() == 0, "after shifting position must be 0") + test.That(t, z.Peek(0) == 'i', "must be 'i' at position 0 after shifting") + test.That(t, z.Peek(1) == 'p', "must be 'p' at position 1 after shifting") + test.T(t, z.Err(), nil, "error must be nil at this point") + + z.Move(len(s) - len("Lorem ") - 1) + test.T(t, z.Err(), nil, "error must be nil just before the end of the buffer") + z.Skip() + test.That(t, z.Pos() == 0, "after skipping position must be 0") + z.Move(1) + test.T(t, z.Err(), io.EOF, "error must be EOF when past the buffer") + z.Move(-1) + test.T(t, z.Err(), nil, "error must be nil just before the end of the buffer, even when it has been past the buffer") + z.Free(0) // has already been tested +} + +func TestStreamLexerShift(t *testing.T) { + s := `Lorem ipsum dolor sit amet, consectetur adipiscing elit.` + z := NewStreamLexerSize(test.NewPlainReader(bytes.NewBufferString(s)), 5) + + z.Move(len("Lorem ")) + test.Bytes(t, z.Shift(), []byte("Lorem "), "shift must return the buffered string") + test.That(t, z.ShiftLen() == len("Lorem "), "shifted length must equal last shift") +} + +func TestStreamLexerSmall(t *testing.T) { + s := `abcdefghijklm` + z := NewStreamLexerSize(test.NewPlainReader(bytes.NewBufferString(s)), 4) + test.That(t, z.Peek(8) == 'i', "first character must be 'i' at position 8") + + z = NewStreamLexerSize(test.NewPlainReader(bytes.NewBufferString(s)), 4) + test.That(t, z.Peek(12) == 'm', "first character must be 'm' at position 12") + + z = NewStreamLexerSize(test.NewPlainReader(bytes.NewBufferString(s)), 0) + test.That(t, z.Peek(4) == 'e', "first character must be 'e' at position 4") + + z = NewStreamLexerSize(test.NewPlainReader(bytes.NewBufferString(s)), 13) + test.That(t, z.Peek(13) == 0, "must yield error at position 13") +} + +func TestStreamLexerSingle(t *testing.T) { + z := NewStreamLexer(test.NewInfiniteReader()) + test.That(t, z.Peek(0) == '.') + test.That(t, z.Peek(1) == '.') + test.That(t, z.Peek(3) == '.', "required two successful reads") +} + +func TestStreamLexerRunes(t *testing.T) { + z := NewStreamLexer(bytes.NewBufferString("aæ†\U00100000")) + r, n := z.PeekRune(0) + test.That(t, n == 1, "first character must be length 1") + test.That(t, r == 'a', "first character must be rune 'a'") + r, n = z.PeekRune(1) + test.That(t, n == 2, "second character must be length 2") + test.That(t, r == 'æ', "second character must be rune 'æ'") + r, n = z.PeekRune(3) + test.That(t, n == 3, "fourth character must be length 3") + test.That(t, r == '†', "fourth character must be rune '†'") + r, n = z.PeekRune(6) + test.That(t, n == 4, "seventh character must be length 4") + test.That(t, r == '\U00100000', "seventh character must be rune '\U00100000'") +} + +func TestStreamLexerBadRune(t *testing.T) { + z := NewStreamLexer(bytes.NewBufferString("\xF0")) // expect four byte rune + r, n := z.PeekRune(0) + test.T(t, n, 4, "length") + test.T(t, r, rune(0), "rune") +} + +func TestStreamLexerZeroLen(t *testing.T) { + z := NewStreamLexer(test.NewPlainReader(bytes.NewBufferString(""))) + test.That(t, z.Peek(0) == 0, "first character must yield error") +} + +func TestStreamLexerEmptyReader(t *testing.T) { + z := NewStreamLexer(test.NewEmptyReader()) + test.That(t, z.Peek(0) == 0, "first character must yield error") + test.T(t, z.Err(), io.EOF, "error must be EOF") + test.That(t, z.Peek(0) == 0, "second peek must also yield error") +} diff --git a/vendor/github.com/tdewolff/parse/buffer/writer.go b/vendor/github.com/tdewolff/parse/buffer/writer.go new file mode 100644 index 0000000..ded2f36 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/buffer/writer.go @@ -0,0 +1,41 @@ +package buffer // import "github.com/tdewolff/parse/buffer" + +// Writer implements an io.Writer over a byte slice. +type Writer struct { + buf []byte +} + +// NewWriter returns a new Writer for a given byte slice. +func NewWriter(buf []byte) *Writer { + return &Writer{ + buf: buf, + } +} + +// Write writes bytes from the given byte slice and returns the number of bytes written and an error if occurred. When err != nil, n == 0. +func (w *Writer) Write(b []byte) (int, error) { + n := len(b) + end := len(w.buf) + if end+n > cap(w.buf) { + buf := make([]byte, end, 2*cap(w.buf)+n) + copy(buf, w.buf) + w.buf = buf + } + w.buf = w.buf[:end+n] + return copy(w.buf[end:], b), nil +} + +// Len returns the length of the underlying byte slice. +func (w *Writer) Len() int { + return len(w.buf) +} + +// Bytes returns the underlying byte slice. +func (w *Writer) Bytes() []byte { + return w.buf +} + +// Reset empties and reuses the current buffer. Subsequent writes will overwrite the buffer, so any reference to the underlying slice is invalidated after this call. +func (w *Writer) Reset() { + w.buf = w.buf[:0] +} diff --git a/vendor/github.com/tdewolff/parse/buffer/writer_test.go b/vendor/github.com/tdewolff/parse/buffer/writer_test.go new file mode 100644 index 0000000..f56f5f7 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/buffer/writer_test.go @@ -0,0 +1,46 @@ +package buffer // import "github.com/tdewolff/parse/buffer" + +import ( + "fmt" + "testing" + + "github.com/tdewolff/test" +) + +func TestWriter(t *testing.T) { + w := NewWriter(make([]byte, 0, 3)) + + test.That(t, w.Len() == 0, "buffer must initially have zero length") + + n, _ := w.Write([]byte("abc")) + test.That(t, n == 3, "first write must write 3 characters") + test.Bytes(t, w.Bytes(), []byte("abc"), "first write must match 'abc'") + test.That(t, w.Len() == 3, "buffer must have length 3 after first write") + + n, _ = w.Write([]byte("def")) + test.That(t, n == 3, "second write must write 3 characters") + test.Bytes(t, w.Bytes(), []byte("abcdef"), "second write must match 'abcdef'") + + w.Reset() + test.Bytes(t, w.Bytes(), []byte(""), "reset must match ''") + + n, _ = w.Write([]byte("ghijkl")) + test.That(t, n == 6, "third write must write 6 characters") + test.Bytes(t, w.Bytes(), []byte("ghijkl"), "third write must match 'ghijkl'") +} + +func ExampleNewWriter() { + w := NewWriter(make([]byte, 0, 11)) // initial buffer length is 11 + w.Write([]byte("Lorem ipsum")) + fmt.Println(string(w.Bytes())) + // Output: Lorem ipsum +} + +func ExampleWriter_Reset() { + w := NewWriter(make([]byte, 0, 11)) // initial buffer length is 10 + w.Write([]byte("garbage that will be overwritten")) // does reallocation + w.Reset() + w.Write([]byte("Lorem ipsum")) + fmt.Println(string(w.Bytes())) + // Output: Lorem ipsum +} diff --git a/vendor/github.com/tdewolff/parse/common.go b/vendor/github.com/tdewolff/parse/common.go new file mode 100644 index 0000000..3b7158e --- /dev/null +++ b/vendor/github.com/tdewolff/parse/common.go @@ -0,0 +1,231 @@ +// Package parse contains a collection of parsers for various formats in its subpackages. +package parse // import "github.com/tdewolff/parse" + +import ( + "bytes" + "encoding/base64" + "errors" + "net/url" +) + +// ErrBadDataURI is returned by DataURI when the byte slice does not start with 'data:' or is too short. +var ErrBadDataURI = errors.New("not a data URI") + +// Number returns the number of bytes that parse as a number of the regex format (+|-)?([0-9]+(\.[0-9]+)?|\.[0-9]+)((e|E)(+|-)?[0-9]+)?. +func Number(b []byte) int { + if len(b) == 0 { + return 0 + } + i := 0 + if b[i] == '+' || b[i] == '-' { + i++ + if i >= len(b) { + return 0 + } + } + firstDigit := (b[i] >= '0' && b[i] <= '9') + if firstDigit { + i++ + for i < len(b) && b[i] >= '0' && b[i] <= '9' { + i++ + } + } + if i < len(b) && b[i] == '.' { + i++ + if i < len(b) && b[i] >= '0' && b[i] <= '9' { + i++ + for i < len(b) && b[i] >= '0' && b[i] <= '9' { + i++ + } + } else if firstDigit { + // . could belong to the next token + i-- + return i + } else { + return 0 + } + } else if !firstDigit { + return 0 + } + iOld := i + if i < len(b) && (b[i] == 'e' || b[i] == 'E') { + i++ + if i < len(b) && (b[i] == '+' || b[i] == '-') { + i++ + } + if i >= len(b) || b[i] < '0' || b[i] > '9' { + // e could belong to next token + return iOld + } + for i < len(b) && b[i] >= '0' && b[i] <= '9' { + i++ + } + } + return i +} + +// Dimension parses a byte-slice and returns the length of the number and its unit. +func Dimension(b []byte) (int, int) { + num := Number(b) + if num == 0 || num == len(b) { + return num, 0 + } else if b[num] == '%' { + return num, 1 + } else if b[num] >= 'a' && b[num] <= 'z' || b[num] >= 'A' && b[num] <= 'Z' { + i := num + 1 + for i < len(b) && (b[i] >= 'a' && b[i] <= 'z' || b[i] >= 'A' && b[i] <= 'Z') { + i++ + } + return num, i - num + } + return num, 0 +} + +// Mediatype parses a given mediatype and splits the mimetype from the parameters. +// It works similar to mime.ParseMediaType but is faster. +func Mediatype(b []byte) ([]byte, map[string]string) { + i := 0 + for i < len(b) && b[i] == ' ' { + i++ + } + b = b[i:] + n := len(b) + mimetype := b + var params map[string]string + for i := 3; i < n; i++ { // mimetype is at least three characters long + if b[i] == ';' || b[i] == ' ' { + mimetype = b[:i] + if b[i] == ' ' { + i++ + for i < n && b[i] == ' ' { + i++ + } + if i < n && b[i] != ';' { + break + } + } + params = map[string]string{} + s := string(b) + PARAM: + i++ + for i < n && s[i] == ' ' { + i++ + } + start := i + for i < n && s[i] != '=' && s[i] != ';' && s[i] != ' ' { + i++ + } + key := s[start:i] + for i < n && s[i] == ' ' { + i++ + } + if i < n && s[i] == '=' { + i++ + for i < n && s[i] == ' ' { + i++ + } + start = i + for i < n && s[i] != ';' && s[i] != ' ' { + i++ + } + } else { + start = i + } + params[key] = s[start:i] + for i < n && s[i] == ' ' { + i++ + } + if i < n && s[i] == ';' { + goto PARAM + } + break + } + } + return mimetype, params +} + +// DataURI parses the given data URI and returns the mediatype, data and ok. +func DataURI(dataURI []byte) ([]byte, []byte, error) { + if len(dataURI) > 5 && bytes.Equal(dataURI[:5], []byte("data:")) { + dataURI = dataURI[5:] + inBase64 := false + var mediatype []byte + i := 0 + for j := 0; j < len(dataURI); j++ { + c := dataURI[j] + if c == '=' || c == ';' || c == ',' { + if c != '=' && bytes.Equal(TrimWhitespace(dataURI[i:j]), []byte("base64")) { + if len(mediatype) > 0 { + mediatype = mediatype[:len(mediatype)-1] + } + inBase64 = true + i = j + } else if c != ',' { + mediatype = append(append(mediatype, TrimWhitespace(dataURI[i:j])...), c) + i = j + 1 + } else { + mediatype = append(mediatype, TrimWhitespace(dataURI[i:j])...) + } + if c == ',' { + if len(mediatype) == 0 || mediatype[0] == ';' { + mediatype = []byte("text/plain") + } + data := dataURI[j+1:] + if inBase64 { + decoded := make([]byte, base64.StdEncoding.DecodedLen(len(data))) + n, err := base64.StdEncoding.Decode(decoded, data) + if err != nil { + return nil, nil, err + } + data = decoded[:n] + } else if unescaped, err := url.QueryUnescape(string(data)); err == nil { + data = []byte(unescaped) + } + return mediatype, data, nil + } + } + } + } + return nil, nil, ErrBadDataURI +} + +// QuoteEntity parses the given byte slice and returns the quote that got matched (' or ") and its entity length. +func QuoteEntity(b []byte) (quote byte, n int) { + if len(b) < 5 || b[0] != '&' { + return 0, 0 + } + if b[1] == '#' { + if b[2] == 'x' { + i := 3 + for i < len(b) && b[i] == '0' { + i++ + } + if i+2 < len(b) && b[i] == '2' && b[i+2] == ';' { + if b[i+1] == '2' { + return '"', i + 3 // " + } else if b[i+1] == '7' { + return '\'', i + 3 // ' + } + } + } else { + i := 2 + for i < len(b) && b[i] == '0' { + i++ + } + if i+2 < len(b) && b[i] == '3' && b[i+2] == ';' { + if b[i+1] == '4' { + return '"', i + 3 // " + } else if b[i+1] == '9' { + return '\'', i + 3 // ' + } + } + } + } else if len(b) >= 6 && b[5] == ';' { + if EqualFold(b[1:5], []byte{'q', 'u', 'o', 't'}) { + return '"', 6 // " + } else if EqualFold(b[1:5], []byte{'a', 'p', 'o', 's'}) { + return '\'', 6 // ' + } + } + return 0, 0 +} diff --git a/vendor/github.com/tdewolff/parse/common_test.go b/vendor/github.com/tdewolff/parse/common_test.go new file mode 100644 index 0000000..a3dbcf4 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/common_test.go @@ -0,0 +1,172 @@ +package parse // import "github.com/tdewolff/parse" + +import ( + "encoding/base64" + "mime" + "testing" + + "github.com/tdewolff/test" +) + +func TestParseNumber(t *testing.T) { + var numberTests = []struct { + number string + expected int + }{ + {"5", 1}, + {"0.51", 4}, + {"0.5e-99", 7}, + {"0.5e-", 3}, + {"+50.0", 5}, + {".0", 2}, + {"0.", 1}, + {"", 0}, + {"+", 0}, + {".", 0}, + {"a", 0}, + } + for _, tt := range numberTests { + t.Run(tt.number, func(t *testing.T) { + n := Number([]byte(tt.number)) + test.T(t, n, tt.expected) + }) + } +} + +func TestParseDimension(t *testing.T) { + var dimensionTests = []struct { + dimension string + expectedNum int + expectedUnit int + }{ + {"5px", 1, 2}, + {"5px ", 1, 2}, + {"5%", 1, 1}, + {"5em", 1, 2}, + {"px", 0, 0}, + {"1", 1, 0}, + {"1~", 1, 0}, + } + for _, tt := range dimensionTests { + t.Run(tt.dimension, func(t *testing.T) { + num, unit := Dimension([]byte(tt.dimension)) + test.T(t, num, tt.expectedNum, "number") + test.T(t, unit, tt.expectedUnit, "unit") + }) + } +} + +func TestMediatype(t *testing.T) { + var mediatypeTests = []struct { + mediatype string + expectedMimetype string + expectedParams map[string]string + }{ + {"text/plain", "text/plain", nil}, + {"text/plain;charset=US-ASCII", "text/plain", map[string]string{"charset": "US-ASCII"}}, + {" text/plain ; charset = US-ASCII ", "text/plain", map[string]string{"charset": "US-ASCII"}}, + {" text/plain a", "text/plain", nil}, + {"text/plain;base64", "text/plain", map[string]string{"base64": ""}}, + {"text/plain;inline=;base64", "text/plain", map[string]string{"inline": "", "base64": ""}}, + } + for _, tt := range mediatypeTests { + t.Run(tt.mediatype, func(t *testing.T) { + mimetype, _ := Mediatype([]byte(tt.mediatype)) + test.String(t, string(mimetype), tt.expectedMimetype, "mimetype") + //test.T(t, params, tt.expectedParams, "parameters") // TODO + }) + } +} + +func TestParseDataURI(t *testing.T) { + var dataURITests = []struct { + dataURI string + expectedMimetype string + expectedData string + expectedErr error + }{ + {"www.domain.com", "", "", ErrBadDataURI}, + {"data:,", "text/plain", "", nil}, + {"data:text/xml,", "text/xml", "", nil}, + {"data:,text", "text/plain", "text", nil}, + {"data:;base64,dGV4dA==", "text/plain", "text", nil}, + {"data:image/svg+xml,", "image/svg+xml", "", nil}, + {"data:;base64,()", "", "", base64.CorruptInputError(0)}, + } + for _, tt := range dataURITests { + t.Run(tt.dataURI, func(t *testing.T) { + mimetype, data, err := DataURI([]byte(tt.dataURI)) + test.T(t, err, tt.expectedErr) + test.String(t, string(mimetype), tt.expectedMimetype, "mimetype") + test.String(t, string(data), tt.expectedData, "data") + }) + } +} + +func TestParseQuoteEntity(t *testing.T) { + var quoteEntityTests = []struct { + quoteEntity string + expectedQuote byte + expectedN int + }{ + {""", '"', 5}, + {"'", '\'', 6}, + {""", '"', 8}, + {"'", '\'', 6}, + {""", '"', 6}, + {"'", '\'', 6}, + {">", 0x00, 0}, + {"&", 0x00, 0}, + } + for _, tt := range quoteEntityTests { + t.Run(tt.quoteEntity, func(t *testing.T) { + quote, n := QuoteEntity([]byte(tt.quoteEntity)) + test.T(t, quote, tt.expectedQuote, "quote") + test.T(t, n, tt.expectedN, "quote length") + }) + } +} + +//////////////////////////////////////////////////////////////// + +func BenchmarkParseMediatypeStd(b *testing.B) { + mediatype := "text/plain" + for i := 0; i < b.N; i++ { + mime.ParseMediaType(mediatype) + } +} + +func BenchmarkParseMediatypeParamStd(b *testing.B) { + mediatype := "text/plain;inline=1" + for i := 0; i < b.N; i++ { + mime.ParseMediaType(mediatype) + } +} + +func BenchmarkParseMediatypeParamsStd(b *testing.B) { + mediatype := "text/plain;charset=US-ASCII;language=US-EN;compression=gzip;base64" + for i := 0; i < b.N; i++ { + mime.ParseMediaType(mediatype) + } +} + +func BenchmarkParseMediatypeParse(b *testing.B) { + mediatype := []byte("text/plain") + for i := 0; i < b.N; i++ { + Mediatype(mediatype) + } +} + +func BenchmarkParseMediatypeParamParse(b *testing.B) { + mediatype := []byte("text/plain;inline=1") + for i := 0; i < b.N; i++ { + Mediatype(mediatype) + } +} + +func BenchmarkParseMediatypeParamsParse(b *testing.B) { + mediatype := []byte("text/plain;charset=US-ASCII;language=US-EN;compression=gzip;base64") + for i := 0; i < b.N; i++ { + Mediatype(mediatype) + } +} diff --git a/vendor/github.com/tdewolff/parse/css/README.md b/vendor/github.com/tdewolff/parse/css/README.md new file mode 100644 index 0000000..2013248 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/css/README.md @@ -0,0 +1,171 @@ +# CSS [![GoDoc](http://godoc.org/github.com/tdewolff/parse/css?status.svg)](http://godoc.org/github.com/tdewolff/parse/css) [![GoCover](http://gocover.io/_badge/github.com/tdewolff/parse/css)](http://gocover.io/github.com/tdewolff/parse/css) + +This package is a CSS3 lexer and parser written in [Go][1]. Both follow the specification at [CSS Syntax Module Level 3](http://www.w3.org/TR/css-syntax-3/). The lexer takes an io.Reader and converts it into tokens until the EOF. The parser returns a parse tree of the full io.Reader input stream, but the low-level `Next` function can be used for stream parsing to returns grammar units until the EOF. + +## Installation +Run the following command + + go get github.com/tdewolff/parse/css + +or add the following import and run project with `go get` + + import "github.com/tdewolff/parse/css" + +## Lexer +### Usage +The following initializes a new Lexer with io.Reader `r`: +``` go +l := css.NewLexer(r) +``` + +To tokenize until EOF an error, use: +``` go +for { + tt, text := l.Next() + switch tt { + case css.ErrorToken: + // error or EOF set in l.Err() + return + // ... + } +} +``` + +All tokens (see [CSS Syntax Module Level 3](http://www.w3.org/TR/css3-syntax/)): +``` go +ErrorToken // non-official token, returned when errors occur +IdentToken +FunctionToken // rgb( rgba( ... +AtKeywordToken // @abc +HashToken // #abc +StringToken +BadStringToken +UrlToken // url( +BadUrlToken +DelimToken // any unmatched character +NumberToken // 5 +PercentageToken // 5% +DimensionToken // 5em +UnicodeRangeToken +IncludeMatchToken // ~= +DashMatchToken // |= +PrefixMatchToken // ^= +SuffixMatchToken // $= +SubstringMatchToken // *= +ColumnToken // || +WhitespaceToken +CDOToken // <!-- +CDCToken // --> +ColonToken +SemicolonToken +CommaToken +BracketToken // ( ) [ ] { }, all bracket tokens use this, Data() can distinguish between the brackets +CommentToken // non-official token +``` + +### Examples +``` go +package main + +import ( + "os" + + "github.com/tdewolff/parse/css" +) + +// Tokenize CSS3 from stdin. +func main() { + l := css.NewLexer(os.Stdin) + for { + tt, text := l.Next() + switch tt { + case css.ErrorToken: + if l.Err() != io.EOF { + fmt.Println("Error on line", l.Line(), ":", l.Err()) + } + return + case css.IdentToken: + fmt.Println("Identifier", string(text)) + case css.NumberToken: + fmt.Println("Number", string(text)) + // ... + } + } +} +``` + +## Parser +### Usage +The following creates a new Parser. +``` go +// true because this is the content of an inline style attribute +p := css.NewParser(bytes.NewBufferString("color: red;"), true) +``` + +To iterate over the stylesheet, use: +``` go +for { + gt, _, data := p.Next() + if gt == css.ErrorGrammar { + break + } + // ... +} +``` + +All grammar units returned by `Next`: +``` go +ErrorGrammar +AtRuleGrammar +EndAtRuleGrammar +RulesetGrammar +EndRulesetGrammar +DeclarationGrammar +TokenGrammar +``` + +### Examples +``` go +package main + +import ( + "bytes" + "fmt" + + "github.com/tdewolff/parse/css" +) + +func main() { + // true because this is the content of an inline style attribute + p := css.NewParser(bytes.NewBufferString("color: red;"), true) + out := "" + for { + gt, _, data := p.Next() + if gt == css.ErrorGrammar { + break + } else if gt == css.AtRuleGrammar || gt == css.BeginAtRuleGrammar || gt == css.BeginRulesetGrammar || gt == css.DeclarationGrammar { + out += string(data) + if gt == css.DeclarationGrammar { + out += ":" + } + for _, val := range p.Values() { + out += string(val.Data) + } + if gt == css.BeginAtRuleGrammar || gt == css.BeginRulesetGrammar { + out += "{" + } else if gt == css.AtRuleGrammar || gt == css.DeclarationGrammar { + out += ";" + } + } else { + out += string(data) + } + } + fmt.Println(out) +} + +``` + +## License +Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). + +[1]: http://golang.org/ "Go Language" diff --git a/vendor/github.com/tdewolff/parse/css/hash.go b/vendor/github.com/tdewolff/parse/css/hash.go new file mode 100644 index 0000000..95595c3 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/css/hash.go @@ -0,0 +1,676 @@ +package css + +// generated by hasher -type=Hash -file=hash.go; DO NOT EDIT, except for adding more constants to the list and rerun go generate + +// uses github.com/tdewolff/hasher +//go:generate hasher -type=Hash -file=hash.go + +// Hash defines perfect hashes for a predefined list of strings +type Hash uint32 + +// Unique hash definitions to be used instead of strings +const ( + Accelerator Hash = 0x47f0b // accelerator + Aliceblue Hash = 0x52509 // aliceblue + Alpha Hash = 0x5af05 // alpha + Antiquewhite Hash = 0x45c0c // antiquewhite + Aquamarine Hash = 0x7020a // aquamarine + Azimuth Hash = 0x5b307 // azimuth + Background Hash = 0xa // background + Background_Attachment Hash = 0x3a15 // background-attachment + Background_Color Hash = 0x11c10 // background-color + Background_Image Hash = 0x99210 // background-image + Background_Position Hash = 0x13 // background-position + Background_Position_X Hash = 0x80815 // background-position-x + Background_Position_Y Hash = 0x15 // background-position-y + Background_Repeat Hash = 0x1511 // background-repeat + Behavior Hash = 0x3108 // behavior + Black Hash = 0x6005 // black + Blanchedalmond Hash = 0x650e // blanchedalmond + Blueviolet Hash = 0x52a0a // blueviolet + Bold Hash = 0x7a04 // bold + Border Hash = 0x8506 // border + Border_Bottom Hash = 0x850d // border-bottom + Border_Bottom_Color Hash = 0x8513 // border-bottom-color + Border_Bottom_Style Hash = 0xbe13 // border-bottom-style + Border_Bottom_Width Hash = 0xe113 // border-bottom-width + Border_Collapse Hash = 0x1020f // border-collapse + Border_Color Hash = 0x1350c // border-color + Border_Left Hash = 0x15c0b // border-left + Border_Left_Color Hash = 0x15c11 // border-left-color + Border_Left_Style Hash = 0x17911 // border-left-style + Border_Left_Width Hash = 0x18a11 // border-left-width + Border_Right Hash = 0x19b0c // border-right + Border_Right_Color Hash = 0x19b12 // border-right-color + Border_Right_Style Hash = 0x1ad12 // border-right-style + Border_Right_Width Hash = 0x1bf12 // border-right-width + Border_Spacing Hash = 0x1d10e // border-spacing + Border_Style Hash = 0x1f40c // border-style + Border_Top Hash = 0x2000a // border-top + Border_Top_Color Hash = 0x20010 // border-top-color + Border_Top_Style Hash = 0x21010 // border-top-style + Border_Top_Width Hash = 0x22010 // border-top-width + Border_Width Hash = 0x2300c // border-width + Bottom Hash = 0x8c06 // bottom + Burlywood Hash = 0x23c09 // burlywood + Cadetblue Hash = 0x25809 // cadetblue + Caption_Side Hash = 0x2610c // caption-side + Charset Hash = 0x44207 // charset + Chartreuse Hash = 0x2730a // chartreuse + Chocolate Hash = 0x27d09 // chocolate + Clear Hash = 0x2ab05 // clear + Clip Hash = 0x2b004 // clip + Color Hash = 0x9305 // color + Content Hash = 0x2e507 // content + Cornflowerblue Hash = 0x2ff0e // cornflowerblue + Cornsilk Hash = 0x30d08 // cornsilk + Counter_Increment Hash = 0x31511 // counter-increment + Counter_Reset Hash = 0x3540d // counter-reset + Cue Hash = 0x36103 // cue + Cue_After Hash = 0x36109 // cue-after + Cue_Before Hash = 0x36a0a // cue-before + Cursive Hash = 0x37b07 // cursive + Cursor Hash = 0x38e06 // cursor + Darkblue Hash = 0x7208 // darkblue + Darkcyan Hash = 0x7d08 // darkcyan + Darkgoldenrod Hash = 0x2440d // darkgoldenrod + Darkgray Hash = 0x25008 // darkgray + Darkgreen Hash = 0x79209 // darkgreen + Darkkhaki Hash = 0x88509 // darkkhaki + Darkmagenta Hash = 0x4f40b // darkmagenta + Darkolivegreen Hash = 0x7210e // darkolivegreen + Darkorange Hash = 0x7860a // darkorange + Darkorchid Hash = 0x87c0a // darkorchid + Darksalmon Hash = 0x8c00a // darksalmon + Darkseagreen Hash = 0x9240c // darkseagreen + Darkslateblue Hash = 0x3940d // darkslateblue + Darkslategray Hash = 0x3a10d // darkslategray + Darkturquoise Hash = 0x3ae0d // darkturquoise + Darkviolet Hash = 0x3bb0a // darkviolet + Deeppink Hash = 0x26b08 // deeppink + Deepskyblue Hash = 0x8930b // deepskyblue + Default Hash = 0x57b07 // default + Direction Hash = 0x9f109 // direction + Display Hash = 0x3c507 // display + Document Hash = 0x3d308 // document + Dodgerblue Hash = 0x3db0a // dodgerblue + Elevation Hash = 0x4a009 // elevation + Empty_Cells Hash = 0x4c20b // empty-cells + Fantasy Hash = 0x5ce07 // fantasy + Filter Hash = 0x59806 // filter + Firebrick Hash = 0x3e509 // firebrick + Float Hash = 0x3ee05 // float + Floralwhite Hash = 0x3f30b // floralwhite + Font Hash = 0xd804 // font + Font_Face Hash = 0xd809 // font-face + Font_Family Hash = 0x41d0b // font-family + Font_Size Hash = 0x42809 // font-size + Font_Size_Adjust Hash = 0x42810 // font-size-adjust + Font_Stretch Hash = 0x4380c // font-stretch + Font_Style Hash = 0x4490a // font-style + Font_Variant Hash = 0x4530c // font-variant + Font_Weight Hash = 0x46e0b // font-weight + Forestgreen Hash = 0x3700b // forestgreen + Fuchsia Hash = 0x47907 // fuchsia + Gainsboro Hash = 0x14c09 // gainsboro + Ghostwhite Hash = 0x1de0a // ghostwhite + Goldenrod Hash = 0x24809 // goldenrod + Greenyellow Hash = 0x7960b // greenyellow + Height Hash = 0x68506 // height + Honeydew Hash = 0x5b908 // honeydew + Hsl Hash = 0xf303 // hsl + Hsla Hash = 0xf304 // hsla + Ime_Mode Hash = 0x88d08 // ime-mode + Import Hash = 0x4e306 // import + Important Hash = 0x4e309 // important + Include_Source Hash = 0x7f20e // include-source + Indianred Hash = 0x4ec09 // indianred + Inherit Hash = 0x51907 // inherit + Initial Hash = 0x52007 // initial + Keyframes Hash = 0x40109 // keyframes + Lavender Hash = 0xf508 // lavender + Lavenderblush Hash = 0xf50d // lavenderblush + Lawngreen Hash = 0x4da09 // lawngreen + Layer_Background_Color Hash = 0x11616 // layer-background-color + Layer_Background_Image Hash = 0x98c16 // layer-background-image + Layout_Flow Hash = 0x5030b // layout-flow + Layout_Grid Hash = 0x53f0b // layout-grid + Layout_Grid_Char Hash = 0x53f10 // layout-grid-char + Layout_Grid_Char_Spacing Hash = 0x53f18 // layout-grid-char-spacing + Layout_Grid_Line Hash = 0x55710 // layout-grid-line + Layout_Grid_Mode Hash = 0x56d10 // layout-grid-mode + Layout_Grid_Type Hash = 0x58210 // layout-grid-type + Left Hash = 0x16304 // left + Lemonchiffon Hash = 0xcf0c // lemonchiffon + Letter_Spacing Hash = 0x5310e // letter-spacing + Lightblue Hash = 0x59e09 // lightblue + Lightcoral Hash = 0x5a70a // lightcoral + Lightcyan Hash = 0x5d509 // lightcyan + Lightgoldenrodyellow Hash = 0x5de14 // lightgoldenrodyellow + Lightgray Hash = 0x60509 // lightgray + Lightgreen Hash = 0x60e0a // lightgreen + Lightpink Hash = 0x61809 // lightpink + Lightsalmon Hash = 0x6210b // lightsalmon + Lightseagreen Hash = 0x62c0d // lightseagreen + Lightskyblue Hash = 0x6390c // lightskyblue + Lightslateblue Hash = 0x6450e // lightslateblue + Lightsteelblue Hash = 0x6530e // lightsteelblue + Lightyellow Hash = 0x6610b // lightyellow + Limegreen Hash = 0x67709 // limegreen + Line_Break Hash = 0x5630a // line-break + Line_Height Hash = 0x6800b // line-height + List_Style Hash = 0x68b0a // list-style + List_Style_Image Hash = 0x68b10 // list-style-image + List_Style_Position Hash = 0x69b13 // list-style-position + List_Style_Type Hash = 0x6ae0f // list-style-type + Magenta Hash = 0x4f807 // magenta + Margin Hash = 0x2c006 // margin + Margin_Bottom Hash = 0x2c00d // margin-bottom + Margin_Left Hash = 0x2cc0b // margin-left + Margin_Right Hash = 0x3320c // margin-right + Margin_Top Hash = 0x7cd0a // margin-top + Marker_Offset Hash = 0x6bd0d // marker-offset + Marks Hash = 0x6ca05 // marks + Max_Height Hash = 0x6e90a // max-height + Max_Width Hash = 0x6f309 // max-width + Media Hash = 0xa1405 // media + Mediumaquamarine Hash = 0x6fc10 // mediumaquamarine + Mediumblue Hash = 0x70c0a // mediumblue + Mediumorchid Hash = 0x7160c // mediumorchid + Mediumpurple Hash = 0x72f0c // mediumpurple + Mediumseagreen Hash = 0x73b0e // mediumseagreen + Mediumslateblue Hash = 0x7490f // mediumslateblue + Mediumspringgreen Hash = 0x75811 // mediumspringgreen + Mediumturquoise Hash = 0x7690f // mediumturquoise + Mediumvioletred Hash = 0x7780f // mediumvioletred + Midnightblue Hash = 0x7a60c // midnightblue + Min_Height Hash = 0x7b20a // min-height + Min_Width Hash = 0x7bc09 // min-width + Mintcream Hash = 0x7c509 // mintcream + Mistyrose Hash = 0x7e309 // mistyrose + Moccasin Hash = 0x7ec08 // moccasin + Monospace Hash = 0x8c709 // monospace + Namespace Hash = 0x49809 // namespace + Navajowhite Hash = 0x4a80b // navajowhite + None Hash = 0x4bf04 // none + Normal Hash = 0x4d506 // normal + Olivedrab Hash = 0x80009 // olivedrab + Orangered Hash = 0x78a09 // orangered + Orphans Hash = 0x48807 // orphans + Outline Hash = 0x81d07 // outline + Outline_Color Hash = 0x81d0d // outline-color + Outline_Style Hash = 0x82a0d // outline-style + Outline_Width Hash = 0x8370d // outline-width + Overflow Hash = 0x2db08 // overflow + Overflow_X Hash = 0x2db0a // overflow-x + Overflow_Y Hash = 0x8440a // overflow-y + Padding Hash = 0x2b307 // padding + Padding_Bottom Hash = 0x2b30e // padding-bottom + Padding_Left Hash = 0x5f90c // padding-left + Padding_Right Hash = 0x7d60d // padding-right + Padding_Top Hash = 0x8d90b // padding-top + Page Hash = 0x84e04 // page + Page_Break_After Hash = 0x8e310 // page-break-after + Page_Break_Before Hash = 0x84e11 // page-break-before + Page_Break_Inside Hash = 0x85f11 // page-break-inside + Palegoldenrod Hash = 0x8700d // palegoldenrod + Palegreen Hash = 0x89e09 // palegreen + Paleturquoise Hash = 0x8a70d // paleturquoise + Palevioletred Hash = 0x8b40d // palevioletred + Papayawhip Hash = 0x8d00a // papayawhip + Pause Hash = 0x8f305 // pause + Pause_After Hash = 0x8f30b // pause-after + Pause_Before Hash = 0x8fe0c // pause-before + Peachpuff Hash = 0x59009 // peachpuff + Pitch Hash = 0x90a05 // pitch + Pitch_Range Hash = 0x90a0b // pitch-range + Play_During Hash = 0x3c80b // play-during + Position Hash = 0xb08 // position + Powderblue Hash = 0x9150a // powderblue + Progid Hash = 0x91f06 // progid + Quotes Hash = 0x93006 // quotes + Rgb Hash = 0x3803 // rgb + Rgba Hash = 0x3804 // rgba + Richness Hash = 0x9708 // richness + Right Hash = 0x1a205 // right + Rosybrown Hash = 0x15309 // rosybrown + Royalblue Hash = 0xb509 // royalblue + Ruby_Align Hash = 0x12b0a // ruby-align + Ruby_Overhang Hash = 0x1400d // ruby-overhang + Ruby_Position Hash = 0x16c0d // ruby-position + Saddlebrown Hash = 0x48e0b // saddlebrown + Sandybrown Hash = 0x4cc0a // sandybrown + Sans_Serif Hash = 0x5c50a // sans-serif + Scrollbar_3d_Light_Color Hash = 0x9e18 // scrollbar-3d-light-color + Scrollbar_Arrow_Color Hash = 0x29615 // scrollbar-arrow-color + Scrollbar_Base_Color Hash = 0x40914 // scrollbar-base-color + Scrollbar_Dark_Shadow_Color Hash = 0x6ce1b // scrollbar-dark-shadow-color + Scrollbar_Face_Color Hash = 0x93514 // scrollbar-face-color + Scrollbar_Highlight_Color Hash = 0x9ce19 // scrollbar-highlight-color + Scrollbar_Shadow_Color Hash = 0x94916 // scrollbar-shadow-color + Scrollbar_Track_Color Hash = 0x95f15 // scrollbar-track-color + Seagreen Hash = 0x63108 // seagreen + Seashell Hash = 0x10f08 // seashell + Serif Hash = 0x5ca05 // serif + Size Hash = 0x42d04 // size + Slateblue Hash = 0x39809 // slateblue + Slategray Hash = 0x3a509 // slategray + Speak Hash = 0x97405 // speak + Speak_Header Hash = 0x9740c // speak-header + Speak_Numeral Hash = 0x9800d // speak-numeral + Speak_Punctuation Hash = 0x9a211 // speak-punctuation + Speech_Rate Hash = 0x9b30b // speech-rate + Springgreen Hash = 0x75e0b // springgreen + Steelblue Hash = 0x65809 // steelblue + Stress Hash = 0x29106 // stress + Supports Hash = 0x9c708 // supports + Table_Layout Hash = 0x4fd0c // table-layout + Text_Align Hash = 0x2840a // text-align + Text_Align_Last Hash = 0x2840f // text-align-last + Text_Autospace Hash = 0x1e60e // text-autospace + Text_Decoration Hash = 0x4b10f // text-decoration + Text_Indent Hash = 0x9bc0b // text-indent + Text_Justify Hash = 0x250c // text-justify + Text_Kashida_Space Hash = 0x4e12 // text-kashida-space + Text_Overflow Hash = 0x2d60d // text-overflow + Text_Shadow Hash = 0x2eb0b // text-shadow + Text_Transform Hash = 0x3250e // text-transform + Text_Underline_Position Hash = 0x33d17 // text-underline-position + Top Hash = 0x20703 // top + Turquoise Hash = 0x3b209 // turquoise + Unicode_Bidi Hash = 0x9e70c // unicode-bidi + Vertical_Align Hash = 0x3800e // vertical-align + Visibility Hash = 0x9fa0a // visibility + Voice_Family Hash = 0xa040c // voice-family + Volume Hash = 0xa1006 // volume + White Hash = 0x1e305 // white + White_Space Hash = 0x4630b // white-space + Whitesmoke Hash = 0x3f90a // whitesmoke + Widows Hash = 0x5c006 // widows + Width Hash = 0xef05 // width + Word_Break Hash = 0x2f50a // word-break + Word_Spacing Hash = 0x50d0c // word-spacing + Word_Wrap Hash = 0x5f109 // word-wrap + Writing_Mode Hash = 0x66b0c // writing-mode + Yellow Hash = 0x5ec06 // yellow + Yellowgreen Hash = 0x79b0b // yellowgreen + Z_Index Hash = 0xa1907 // z-index +) + +// String returns the hash' name. +func (i Hash) String() string { + start := uint32(i >> 8) + n := uint32(i & 0xff) + if start+n > uint32(len(_Hash_text)) { + return "" + } + return _Hash_text[start : start+n] +} + +// ToHash returns the hash whose name is s. It returns zero if there is no +// such hash. It is case sensitive. +func ToHash(s []byte) Hash { + if len(s) == 0 || len(s) > _Hash_maxLen { + return 0 + } + h := uint32(_Hash_hash0) + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= 16777619 + } + if i := _Hash_table[h&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { + t := _Hash_text[i>>8 : i>>8+i&0xff] + for i := 0; i < len(s); i++ { + if t[i] != s[i] { + goto NEXT + } + } + return i + } +NEXT: + if i := _Hash_table[(h>>16)&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { + t := _Hash_text[i>>8 : i>>8+i&0xff] + for i := 0; i < len(s); i++ { + if t[i] != s[i] { + return 0 + } + } + return i + } + return 0 +} + +const _Hash_hash0 = 0x700e0976 +const _Hash_maxLen = 27 +const _Hash_text = "background-position-ybackground-repeatext-justifybehaviorgba" + + "ckground-attachmentext-kashida-spaceblackblanchedalmondarkbl" + + "ueboldarkcyanborder-bottom-colorichnesscrollbar-3d-light-col" + + "oroyalblueborder-bottom-stylemonchiffont-faceborder-bottom-w" + + "idthslavenderblushborder-collapseashellayer-background-color" + + "uby-alignborder-coloruby-overhangainsborosybrownborder-left-" + + "coloruby-positionborder-left-styleborder-left-widthborder-ri" + + "ght-colorborder-right-styleborder-right-widthborder-spacingh" + + "ostwhitext-autospaceborder-styleborder-top-colorborder-top-s" + + "tyleborder-top-widthborder-widthburlywoodarkgoldenrodarkgray" + + "cadetbluecaption-sideeppinkchartreusechocolatext-align-lastr" + + "esscrollbar-arrow-colorclearclipadding-bottomargin-bottomarg" + + "in-leftext-overflow-xcontentext-shadoword-breakcornflowerblu" + + "ecornsilkcounter-incrementext-transformargin-rightext-underl" + + "ine-positioncounter-resetcue-aftercue-beforestgreencursivert" + + "ical-aligncursordarkslatebluedarkslategraydarkturquoisedarkv" + + "ioletdisplay-duringdocumentdodgerbluefirebrickfloatfloralwhi" + + "tesmokeyframescrollbar-base-colorfont-familyfont-size-adjust" + + "font-stretcharsetfont-stylefont-variantiquewhite-spacefont-w" + + "eightfuchsiacceleratorphansaddlebrownamespacelevationavajowh" + + "itext-decorationonempty-cellsandybrownormalawngreenimportant" + + "indianredarkmagentable-layout-floword-spacinginheritinitiali" + + "cebluevioletter-spacinglayout-grid-char-spacinglayout-grid-l" + + "ine-breaklayout-grid-modefaultlayout-grid-typeachpuffilterli" + + "ghtbluelightcoralphazimuthoneydewidowsans-serifantasylightcy" + + "anlightgoldenrodyelloword-wrapadding-leftlightgraylightgreen" + + "lightpinklightsalmonlightseagreenlightskybluelightslatebluel" + + "ightsteelbluelightyellowriting-modelimegreenline-heightlist-" + + "style-imagelist-style-positionlist-style-typemarker-offsetma" + + "rkscrollbar-dark-shadow-colormax-heightmax-widthmediumaquama" + + "rinemediumbluemediumorchidarkolivegreenmediumpurplemediumsea" + + "greenmediumslatebluemediumspringgreenmediumturquoisemediumvi" + + "oletredarkorangeredarkgreenyellowgreenmidnightbluemin-height" + + "min-widthmintcreamargin-topadding-rightmistyrosemoccasinclud" + + "e-sourceolivedrabackground-position-xoutline-coloroutline-st" + + "yleoutline-widthoverflow-ypage-break-beforepage-break-inside" + + "palegoldenrodarkorchidarkkhakime-modeepskybluepalegreenpalet" + + "urquoisepalevioletredarksalmonospacepapayawhipadding-topage-" + + "break-afterpause-afterpause-beforepitch-rangepowderblueprogi" + + "darkseagreenquotescrollbar-face-colorscrollbar-shadow-colors" + + "crollbar-track-colorspeak-headerspeak-numeralayer-background" + + "-imagespeak-punctuationspeech-ratext-indentsupportscrollbar-" + + "highlight-colorunicode-bidirectionvisibilityvoice-familyvolu" + + "mediaz-index" + +var _Hash_table = [1 << 9]Hash{ + 0x0: 0x4cc0a, // sandybrown + 0x1: 0x20703, // top + 0x4: 0xb509, // royalblue + 0x6: 0x4b10f, // text-decoration + 0xb: 0x5030b, // layout-flow + 0xc: 0x11c10, // background-color + 0xd: 0x8c06, // bottom + 0x10: 0x62c0d, // lightseagreen + 0x11: 0x8930b, // deepskyblue + 0x12: 0x39809, // slateblue + 0x13: 0x4c20b, // empty-cells + 0x14: 0x2b004, // clip + 0x15: 0x70c0a, // mediumblue + 0x16: 0x49809, // namespace + 0x18: 0x2c00d, // margin-bottom + 0x1a: 0x1350c, // border-color + 0x1b: 0x5b908, // honeydew + 0x1d: 0x2300c, // border-width + 0x1e: 0x9740c, // speak-header + 0x1f: 0x8b40d, // palevioletred + 0x20: 0x1d10e, // border-spacing + 0x22: 0x2b307, // padding + 0x23: 0x3320c, // margin-right + 0x27: 0x7bc09, // min-width + 0x29: 0x60509, // lightgray + 0x2a: 0x6610b, // lightyellow + 0x2c: 0x8e310, // page-break-after + 0x2d: 0x2e507, // content + 0x30: 0x250c, // text-justify + 0x32: 0x2840f, // text-align-last + 0x34: 0x93514, // scrollbar-face-color + 0x35: 0x40109, // keyframes + 0x37: 0x4f807, // magenta + 0x38: 0x3a509, // slategray + 0x3a: 0x99210, // background-image + 0x3c: 0x7f20e, // include-source + 0x3d: 0x65809, // steelblue + 0x3e: 0x81d0d, // outline-color + 0x40: 0x1020f, // border-collapse + 0x41: 0xf508, // lavender + 0x42: 0x9c708, // supports + 0x44: 0x6800b, // line-height + 0x45: 0x9a211, // speak-punctuation + 0x46: 0x9fa0a, // visibility + 0x47: 0x2ab05, // clear + 0x4b: 0x52a0a, // blueviolet + 0x4e: 0x57b07, // default + 0x50: 0x6bd0d, // marker-offset + 0x52: 0x31511, // counter-increment + 0x53: 0x6450e, // lightslateblue + 0x54: 0x10f08, // seashell + 0x56: 0x16c0d, // ruby-position + 0x57: 0x82a0d, // outline-style + 0x58: 0x63108, // seagreen + 0x59: 0x9305, // color + 0x5c: 0x2610c, // caption-side + 0x5d: 0x68506, // height + 0x5e: 0x7490f, // mediumslateblue + 0x5f: 0x8fe0c, // pause-before + 0x60: 0xcf0c, // lemonchiffon + 0x63: 0x37b07, // cursive + 0x66: 0x4a80b, // navajowhite + 0x67: 0xa040c, // voice-family + 0x68: 0x2440d, // darkgoldenrod + 0x69: 0x3e509, // firebrick + 0x6a: 0x4490a, // font-style + 0x6b: 0x9f109, // direction + 0x6d: 0x7860a, // darkorange + 0x6f: 0x4530c, // font-variant + 0x70: 0x2c006, // margin + 0x71: 0x84e11, // page-break-before + 0x73: 0x2d60d, // text-overflow + 0x74: 0x4e12, // text-kashida-space + 0x75: 0x30d08, // cornsilk + 0x76: 0x46e0b, // font-weight + 0x77: 0x42d04, // size + 0x78: 0x53f0b, // layout-grid + 0x79: 0x8d90b, // padding-top + 0x7a: 0x44207, // charset + 0x7d: 0x7e309, // mistyrose + 0x7e: 0x5b307, // azimuth + 0x7f: 0x8f30b, // pause-after + 0x84: 0x38e06, // cursor + 0x85: 0xf303, // hsl + 0x86: 0x5310e, // letter-spacing + 0x8b: 0x3d308, // document + 0x8d: 0x36109, // cue-after + 0x8f: 0x36a0a, // cue-before + 0x91: 0x5ce07, // fantasy + 0x94: 0x1400d, // ruby-overhang + 0x95: 0x2b30e, // padding-bottom + 0x9a: 0x59e09, // lightblue + 0x9c: 0x8c00a, // darksalmon + 0x9d: 0x42810, // font-size-adjust + 0x9e: 0x61809, // lightpink + 0xa0: 0x9240c, // darkseagreen + 0xa2: 0x85f11, // page-break-inside + 0xa4: 0x24809, // goldenrod + 0xa6: 0xa1405, // media + 0xa7: 0x53f18, // layout-grid-char-spacing + 0xa9: 0x4e309, // important + 0xaa: 0x7b20a, // min-height + 0xb0: 0x15c11, // border-left-color + 0xb1: 0x84e04, // page + 0xb2: 0x98c16, // layer-background-image + 0xb5: 0x55710, // layout-grid-line + 0xb6: 0x1511, // background-repeat + 0xb7: 0x8513, // border-bottom-color + 0xb9: 0x25008, // darkgray + 0xbb: 0x5f90c, // padding-left + 0xbc: 0x1a205, // right + 0xc0: 0x40914, // scrollbar-base-color + 0xc1: 0x6530e, // lightsteelblue + 0xc2: 0xef05, // width + 0xc5: 0x3b209, // turquoise + 0xc8: 0x3ee05, // float + 0xca: 0x12b0a, // ruby-align + 0xcb: 0xb08, // position + 0xcc: 0x7cd0a, // margin-top + 0xce: 0x2cc0b, // margin-left + 0xcf: 0x2eb0b, // text-shadow + 0xd0: 0x2f50a, // word-break + 0xd4: 0x3f90a, // whitesmoke + 0xd6: 0x33d17, // text-underline-position + 0xd7: 0x1bf12, // border-right-width + 0xd8: 0x80009, // olivedrab + 0xd9: 0x89e09, // palegreen + 0xdb: 0x4e306, // import + 0xdc: 0x6ca05, // marks + 0xdd: 0x3bb0a, // darkviolet + 0xde: 0x13, // background-position + 0xe0: 0x6fc10, // mediumaquamarine + 0xe1: 0x7a04, // bold + 0xe2: 0x7690f, // mediumturquoise + 0xe4: 0x8700d, // palegoldenrod + 0xe5: 0x4f40b, // darkmagenta + 0xe6: 0x15309, // rosybrown + 0xe7: 0x18a11, // border-left-width + 0xe8: 0x88509, // darkkhaki + 0xea: 0x650e, // blanchedalmond + 0xeb: 0x52007, // initial + 0xec: 0x6ce1b, // scrollbar-dark-shadow-color + 0xee: 0x48e0b, // saddlebrown + 0xef: 0x8a70d, // paleturquoise + 0xf1: 0x19b12, // border-right-color + 0xf3: 0x1e305, // white + 0xf7: 0x9ce19, // scrollbar-highlight-color + 0xf9: 0x56d10, // layout-grid-mode + 0xfc: 0x1f40c, // border-style + 0xfe: 0x69b13, // list-style-position + 0x100: 0x11616, // layer-background-color + 0x102: 0x58210, // layout-grid-type + 0x103: 0x15c0b, // border-left + 0x104: 0x2db08, // overflow + 0x105: 0x7a60c, // midnightblue + 0x10b: 0x2840a, // text-align + 0x10e: 0x21010, // border-top-style + 0x110: 0x5de14, // lightgoldenrodyellow + 0x114: 0x8506, // border + 0x119: 0xd804, // font + 0x11c: 0x7020a, // aquamarine + 0x11d: 0x60e0a, // lightgreen + 0x11e: 0x5ec06, // yellow + 0x120: 0x97405, // speak + 0x121: 0x4630b, // white-space + 0x123: 0x3940d, // darkslateblue + 0x125: 0x1e60e, // text-autospace + 0x128: 0xf50d, // lavenderblush + 0x12c: 0x6210b, // lightsalmon + 0x12d: 0x51907, // inherit + 0x131: 0x87c0a, // darkorchid + 0x132: 0x2000a, // border-top + 0x133: 0x3c80b, // play-during + 0x137: 0x22010, // border-top-width + 0x139: 0x48807, // orphans + 0x13a: 0x41d0b, // font-family + 0x13d: 0x3db0a, // dodgerblue + 0x13f: 0x8d00a, // papayawhip + 0x140: 0x8f305, // pause + 0x143: 0x2ff0e, // cornflowerblue + 0x144: 0x3c507, // display + 0x146: 0x52509, // aliceblue + 0x14a: 0x7208, // darkblue + 0x14b: 0x3108, // behavior + 0x14c: 0x3540d, // counter-reset + 0x14d: 0x7960b, // greenyellow + 0x14e: 0x75811, // mediumspringgreen + 0x14f: 0x9150a, // powderblue + 0x150: 0x53f10, // layout-grid-char + 0x158: 0x81d07, // outline + 0x159: 0x23c09, // burlywood + 0x15b: 0xe113, // border-bottom-width + 0x15c: 0x4bf04, // none + 0x15e: 0x36103, // cue + 0x15f: 0x4fd0c, // table-layout + 0x160: 0x90a0b, // pitch-range + 0x161: 0xa1907, // z-index + 0x162: 0x29106, // stress + 0x163: 0x80815, // background-position-x + 0x165: 0x4d506, // normal + 0x167: 0x72f0c, // mediumpurple + 0x169: 0x5a70a, // lightcoral + 0x16c: 0x6e90a, // max-height + 0x16d: 0x3804, // rgba + 0x16e: 0x68b10, // list-style-image + 0x170: 0x26b08, // deeppink + 0x173: 0x91f06, // progid + 0x175: 0x75e0b, // springgreen + 0x176: 0x3700b, // forestgreen + 0x179: 0x7ec08, // moccasin + 0x17a: 0x7780f, // mediumvioletred + 0x17e: 0x9bc0b, // text-indent + 0x181: 0x6ae0f, // list-style-type + 0x182: 0x14c09, // gainsboro + 0x183: 0x3ae0d, // darkturquoise + 0x184: 0x3a10d, // darkslategray + 0x189: 0x2db0a, // overflow-x + 0x18b: 0x93006, // quotes + 0x18c: 0x3a15, // background-attachment + 0x18f: 0x19b0c, // border-right + 0x191: 0x6005, // black + 0x192: 0x79b0b, // yellowgreen + 0x194: 0x59009, // peachpuff + 0x197: 0x3f30b, // floralwhite + 0x19c: 0x7210e, // darkolivegreen + 0x19d: 0x5f109, // word-wrap + 0x19e: 0x17911, // border-left-style + 0x1a0: 0x9b30b, // speech-rate + 0x1a1: 0x8370d, // outline-width + 0x1a2: 0x9e70c, // unicode-bidi + 0x1a3: 0x68b0a, // list-style + 0x1a4: 0x90a05, // pitch + 0x1a5: 0x95f15, // scrollbar-track-color + 0x1a6: 0x47907, // fuchsia + 0x1a8: 0x3800e, // vertical-align + 0x1ad: 0x5af05, // alpha + 0x1ae: 0x6f309, // max-width + 0x1af: 0x9708, // richness + 0x1b0: 0x3803, // rgb + 0x1b1: 0x7d60d, // padding-right + 0x1b2: 0x29615, // scrollbar-arrow-color + 0x1b3: 0x16304, // left + 0x1b5: 0x4a009, // elevation + 0x1b6: 0x5630a, // line-break + 0x1ba: 0x27d09, // chocolate + 0x1bb: 0x9800d, // speak-numeral + 0x1bd: 0x47f0b, // accelerator + 0x1be: 0x67709, // limegreen + 0x1c1: 0x7d08, // darkcyan + 0x1c3: 0x6390c, // lightskyblue + 0x1c5: 0x5c50a, // sans-serif + 0x1c6: 0x850d, // border-bottom + 0x1c7: 0xa, // background + 0x1c8: 0xa1006, // volume + 0x1ca: 0x66b0c, // writing-mode + 0x1cb: 0x9e18, // scrollbar-3d-light-color + 0x1cc: 0x5c006, // widows + 0x1cf: 0x42809, // font-size + 0x1d0: 0x15, // background-position-y + 0x1d1: 0x5d509, // lightcyan + 0x1d4: 0x4ec09, // indianred + 0x1d7: 0x1de0a, // ghostwhite + 0x1db: 0x78a09, // orangered + 0x1dc: 0x45c0c, // antiquewhite + 0x1dd: 0x4da09, // lawngreen + 0x1df: 0x73b0e, // mediumseagreen + 0x1e0: 0x20010, // border-top-color + 0x1e2: 0xf304, // hsla + 0x1e4: 0x3250e, // text-transform + 0x1e6: 0x7160c, // mediumorchid + 0x1e9: 0x8c709, // monospace + 0x1ec: 0x94916, // scrollbar-shadow-color + 0x1ed: 0x79209, // darkgreen + 0x1ef: 0x25809, // cadetblue + 0x1f0: 0x59806, // filter + 0x1f1: 0x1ad12, // border-right-style + 0x1f6: 0x8440a, // overflow-y + 0x1f7: 0xd809, // font-face + 0x1f8: 0x50d0c, // word-spacing + 0x1fa: 0xbe13, // border-bottom-style + 0x1fb: 0x4380c, // font-stretch + 0x1fc: 0x7c509, // mintcream + 0x1fd: 0x88d08, // ime-mode + 0x1fe: 0x2730a, // chartreuse + 0x1ff: 0x5ca05, // serif +} diff --git a/vendor/github.com/tdewolff/parse/css/hash_test.go b/vendor/github.com/tdewolff/parse/css/hash_test.go new file mode 100644 index 0000000..e176cc1 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/css/hash_test.go @@ -0,0 +1,16 @@ +package css // import "github.com/tdewolff/parse/css" + +import ( + "testing" + + "github.com/tdewolff/test" +) + +func TestHashTable(t *testing.T) { + test.T(t, ToHash([]byte("font")), Font, "'font' must resolve to hash.Font") + test.T(t, Font.String(), "font") + test.T(t, Margin_Left.String(), "margin-left") + test.T(t, ToHash([]byte("")), Hash(0), "empty string must resolve to zero") + test.T(t, Hash(0xffffff).String(), "") + test.T(t, ToHash([]byte("fonts")), Hash(0), "'fonts' must resolve to zero") +} diff --git a/vendor/github.com/tdewolff/parse/css/lex.go b/vendor/github.com/tdewolff/parse/css/lex.go new file mode 100644 index 0000000..3924bb7 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/css/lex.go @@ -0,0 +1,710 @@ +// Package css is a CSS3 lexer and parser following the specifications at http://www.w3.org/TR/css-syntax-3/. +package css // import "github.com/tdewolff/parse/css" + +// TODO: \uFFFD replacement character for NULL bytes in strings for example, or atleast don't end the string early + +import ( + "bytes" + "io" + "strconv" + + "github.com/tdewolff/parse" + "github.com/tdewolff/parse/buffer" +) + +// TokenType determines the type of token, eg. a number or a semicolon. +type TokenType uint32 + +// TokenType values. +const ( + ErrorToken TokenType = iota // extra token when errors occur + IdentToken + FunctionToken // rgb( rgba( ... + AtKeywordToken // @abc + HashToken // #abc + StringToken + BadStringToken + URLToken + BadURLToken + DelimToken // any unmatched character + NumberToken // 5 + PercentageToken // 5% + DimensionToken // 5em + UnicodeRangeToken // U+554A + IncludeMatchToken // ~= + DashMatchToken // |= + PrefixMatchToken // ^= + SuffixMatchToken // $= + SubstringMatchToken // *= + ColumnToken // || + WhitespaceToken // space \t \r \n \f + CDOToken // <!-- + CDCToken // --> + ColonToken // : + SemicolonToken // ; + CommaToken // , + LeftBracketToken // [ + RightBracketToken // ] + LeftParenthesisToken // ( + RightParenthesisToken // ) + LeftBraceToken // { + RightBraceToken // } + CommentToken // extra token for comments + EmptyToken + CustomPropertyNameToken + CustomPropertyValueToken +) + +// String returns the string representation of a TokenType. +func (tt TokenType) String() string { + switch tt { + case ErrorToken: + return "Error" + case IdentToken: + return "Ident" + case FunctionToken: + return "Function" + case AtKeywordToken: + return "AtKeyword" + case HashToken: + return "Hash" + case StringToken: + return "String" + case BadStringToken: + return "BadString" + case URLToken: + return "URL" + case BadURLToken: + return "BadURL" + case DelimToken: + return "Delim" + case NumberToken: + return "Number" + case PercentageToken: + return "Percentage" + case DimensionToken: + return "Dimension" + case UnicodeRangeToken: + return "UnicodeRange" + case IncludeMatchToken: + return "IncludeMatch" + case DashMatchToken: + return "DashMatch" + case PrefixMatchToken: + return "PrefixMatch" + case SuffixMatchToken: + return "SuffixMatch" + case SubstringMatchToken: + return "SubstringMatch" + case ColumnToken: + return "Column" + case WhitespaceToken: + return "Whitespace" + case CDOToken: + return "CDO" + case CDCToken: + return "CDC" + case ColonToken: + return "Colon" + case SemicolonToken: + return "Semicolon" + case CommaToken: + return "Comma" + case LeftBracketToken: + return "LeftBracket" + case RightBracketToken: + return "RightBracket" + case LeftParenthesisToken: + return "LeftParenthesis" + case RightParenthesisToken: + return "RightParenthesis" + case LeftBraceToken: + return "LeftBrace" + case RightBraceToken: + return "RightBrace" + case CommentToken: + return "Comment" + case EmptyToken: + return "Empty" + case CustomPropertyNameToken: + return "CustomPropertyName" + case CustomPropertyValueToken: + return "CustomPropertyValue" + } + return "Invalid(" + strconv.Itoa(int(tt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// Lexer is the state for the lexer. +type Lexer struct { + r *buffer.Lexer +} + +// NewLexer returns a new Lexer for a given io.Reader. +func NewLexer(r io.Reader) *Lexer { + return &Lexer{ + buffer.NewLexer(r), + } +} + +// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned. +func (l *Lexer) Err() error { + return l.r.Err() +} + +// Restore restores the NULL byte at the end of the buffer. +func (l *Lexer) Restore() { + l.r.Restore() +} + +// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message. +func (l *Lexer) Next() (TokenType, []byte) { + switch l.r.Peek(0) { + case ' ', '\t', '\n', '\r', '\f': + l.r.Move(1) + for l.consumeWhitespace() { + } + return WhitespaceToken, l.r.Shift() + case ':': + l.r.Move(1) + return ColonToken, l.r.Shift() + case ';': + l.r.Move(1) + return SemicolonToken, l.r.Shift() + case ',': + l.r.Move(1) + return CommaToken, l.r.Shift() + case '(', ')', '[', ']', '{', '}': + if t := l.consumeBracket(); t != ErrorToken { + return t, l.r.Shift() + } + case '#': + if l.consumeHashToken() { + return HashToken, l.r.Shift() + } + case '"', '\'': + if t := l.consumeString(); t != ErrorToken { + return t, l.r.Shift() + } + case '.', '+': + if t := l.consumeNumeric(); t != ErrorToken { + return t, l.r.Shift() + } + case '-': + if t := l.consumeNumeric(); t != ErrorToken { + return t, l.r.Shift() + } else if t := l.consumeIdentlike(); t != ErrorToken { + return t, l.r.Shift() + } else if l.consumeCDCToken() { + return CDCToken, l.r.Shift() + } else if l.consumeCustomVariableToken() { + return CustomPropertyNameToken, l.r.Shift() + } + case '@': + if l.consumeAtKeywordToken() { + return AtKeywordToken, l.r.Shift() + } + case '$', '*', '^', '~': + if t := l.consumeMatch(); t != ErrorToken { + return t, l.r.Shift() + } + case '/': + if l.consumeComment() { + return CommentToken, l.r.Shift() + } + case '<': + if l.consumeCDOToken() { + return CDOToken, l.r.Shift() + } + case '\\': + if t := l.consumeIdentlike(); t != ErrorToken { + return t, l.r.Shift() + } + case 'u', 'U': + if l.consumeUnicodeRangeToken() { + return UnicodeRangeToken, l.r.Shift() + } else if t := l.consumeIdentlike(); t != ErrorToken { + return t, l.r.Shift() + } + case '|': + if t := l.consumeMatch(); t != ErrorToken { + return t, l.r.Shift() + } else if l.consumeColumnToken() { + return ColumnToken, l.r.Shift() + } + case 0: + if l.Err() != nil { + return ErrorToken, nil + } + default: + if t := l.consumeNumeric(); t != ErrorToken { + return t, l.r.Shift() + } else if t := l.consumeIdentlike(); t != ErrorToken { + return t, l.r.Shift() + } + } + // can't be rune because consumeIdentlike consumes that as an identifier + l.r.Move(1) + return DelimToken, l.r.Shift() +} + +//////////////////////////////////////////////////////////////// + +/* +The following functions follow the railroad diagrams in http://www.w3.org/TR/css3-syntax/ +*/ + +func (l *Lexer) consumeByte(c byte) bool { + if l.r.Peek(0) == c { + l.r.Move(1) + return true + } + return false +} + +func (l *Lexer) consumeComment() bool { + if l.r.Peek(0) != '/' || l.r.Peek(1) != '*' { + return false + } + l.r.Move(2) + for { + c := l.r.Peek(0) + if c == 0 && l.Err() != nil { + break + } else if c == '*' && l.r.Peek(1) == '/' { + l.r.Move(2) + return true + } + l.r.Move(1) + } + return true +} + +func (l *Lexer) consumeNewline() bool { + c := l.r.Peek(0) + if c == '\n' || c == '\f' { + l.r.Move(1) + return true + } else if c == '\r' { + if l.r.Peek(1) == '\n' { + l.r.Move(2) + } else { + l.r.Move(1) + } + return true + } + return false +} + +func (l *Lexer) consumeWhitespace() bool { + c := l.r.Peek(0) + if c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' { + l.r.Move(1) + return true + } + return false +} + +func (l *Lexer) consumeDigit() bool { + c := l.r.Peek(0) + if c >= '0' && c <= '9' { + l.r.Move(1) + return true + } + return false +} + +func (l *Lexer) consumeHexDigit() bool { + c := l.r.Peek(0) + if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') { + l.r.Move(1) + return true + } + return false +} + +func (l *Lexer) consumeEscape() bool { + if l.r.Peek(0) != '\\' { + return false + } + mark := l.r.Pos() + l.r.Move(1) + if l.consumeNewline() { + l.r.Rewind(mark) + return false + } else if l.consumeHexDigit() { + for k := 1; k < 6; k++ { + if !l.consumeHexDigit() { + break + } + } + l.consumeWhitespace() + return true + } else { + c := l.r.Peek(0) + if c >= 0xC0 { + _, n := l.r.PeekRune(0) + l.r.Move(n) + return true + } else if c == 0 && l.r.Err() != nil { + return true + } + } + l.r.Move(1) + return true +} + +func (l *Lexer) consumeIdentToken() bool { + mark := l.r.Pos() + if l.r.Peek(0) == '-' { + l.r.Move(1) + } + c := l.r.Peek(0) + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c >= 0x80) { + if c != '\\' || !l.consumeEscape() { + l.r.Rewind(mark) + return false + } + } else { + l.r.Move(1) + } + for { + c := l.r.Peek(0) + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c >= 0x80) { + if c != '\\' || !l.consumeEscape() { + break + } + } else { + l.r.Move(1) + } + } + return true +} + +// support custom variables, https://www.w3.org/TR/css-variables-1/ +func (l *Lexer) consumeCustomVariableToken() bool { + // expect to be on a '-' + l.r.Move(1) + if l.r.Peek(0) != '-' { + l.r.Move(-1) + return false + } + if !l.consumeIdentToken() { + l.r.Move(-1) + return false + } + return true +} + +func (l *Lexer) consumeAtKeywordToken() bool { + // expect to be on an '@' + l.r.Move(1) + if !l.consumeIdentToken() { + l.r.Move(-1) + return false + } + return true +} + +func (l *Lexer) consumeHashToken() bool { + // expect to be on a '#' + mark := l.r.Pos() + l.r.Move(1) + c := l.r.Peek(0) + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c >= 0x80) { + if c != '\\' || !l.consumeEscape() { + l.r.Rewind(mark) + return false + } + } else { + l.r.Move(1) + } + for { + c := l.r.Peek(0) + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c >= 0x80) { + if c != '\\' || !l.consumeEscape() { + break + } + } else { + l.r.Move(1) + } + } + return true +} + +func (l *Lexer) consumeNumberToken() bool { + mark := l.r.Pos() + c := l.r.Peek(0) + if c == '+' || c == '-' { + l.r.Move(1) + } + firstDigit := l.consumeDigit() + if firstDigit { + for l.consumeDigit() { + } + } + if l.r.Peek(0) == '.' { + l.r.Move(1) + if l.consumeDigit() { + for l.consumeDigit() { + } + } else if firstDigit { + // . could belong to the next token + l.r.Move(-1) + return true + } else { + l.r.Rewind(mark) + return false + } + } else if !firstDigit { + l.r.Rewind(mark) + return false + } + mark = l.r.Pos() + c = l.r.Peek(0) + if c == 'e' || c == 'E' { + l.r.Move(1) + c = l.r.Peek(0) + if c == '+' || c == '-' { + l.r.Move(1) + } + if !l.consumeDigit() { + // e could belong to next token + l.r.Rewind(mark) + return true + } + for l.consumeDigit() { + } + } + return true +} + +func (l *Lexer) consumeUnicodeRangeToken() bool { + c := l.r.Peek(0) + if (c != 'u' && c != 'U') || l.r.Peek(1) != '+' { + return false + } + mark := l.r.Pos() + l.r.Move(2) + if l.consumeHexDigit() { + // consume up to 6 hexDigits + k := 1 + for ; k < 6; k++ { + if !l.consumeHexDigit() { + break + } + } + + // either a minus or a question mark or the end is expected + if l.consumeByte('-') { + // consume another up to 6 hexDigits + if l.consumeHexDigit() { + for k := 1; k < 6; k++ { + if !l.consumeHexDigit() { + break + } + } + } else { + l.r.Rewind(mark) + return false + } + } else { + // could be filled up to 6 characters with question marks or else regular hexDigits + if l.consumeByte('?') { + k++ + for ; k < 6; k++ { + if !l.consumeByte('?') { + l.r.Rewind(mark) + return false + } + } + } + } + } else { + // consume 6 question marks + for k := 0; k < 6; k++ { + if !l.consumeByte('?') { + l.r.Rewind(mark) + return false + } + } + } + return true +} + +func (l *Lexer) consumeColumnToken() bool { + if l.r.Peek(0) == '|' && l.r.Peek(1) == '|' { + l.r.Move(2) + return true + } + return false +} + +func (l *Lexer) consumeCDOToken() bool { + if l.r.Peek(0) == '<' && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' { + l.r.Move(4) + return true + } + return false +} + +func (l *Lexer) consumeCDCToken() bool { + if l.r.Peek(0) == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' { + l.r.Move(3) + return true + } + return false +} + +//////////////////////////////////////////////////////////////// + +// consumeMatch consumes any MatchToken. +func (l *Lexer) consumeMatch() TokenType { + if l.r.Peek(1) == '=' { + switch l.r.Peek(0) { + case '~': + l.r.Move(2) + return IncludeMatchToken + case '|': + l.r.Move(2) + return DashMatchToken + case '^': + l.r.Move(2) + return PrefixMatchToken + case '$': + l.r.Move(2) + return SuffixMatchToken + case '*': + l.r.Move(2) + return SubstringMatchToken + } + } + return ErrorToken +} + +// consumeBracket consumes any bracket token. +func (l *Lexer) consumeBracket() TokenType { + switch l.r.Peek(0) { + case '(': + l.r.Move(1) + return LeftParenthesisToken + case ')': + l.r.Move(1) + return RightParenthesisToken + case '[': + l.r.Move(1) + return LeftBracketToken + case ']': + l.r.Move(1) + return RightBracketToken + case '{': + l.r.Move(1) + return LeftBraceToken + case '}': + l.r.Move(1) + return RightBraceToken + } + return ErrorToken +} + +// consumeNumeric consumes NumberToken, PercentageToken or DimensionToken. +func (l *Lexer) consumeNumeric() TokenType { + if l.consumeNumberToken() { + if l.consumeByte('%') { + return PercentageToken + } else if l.consumeIdentToken() { + return DimensionToken + } + return NumberToken + } + return ErrorToken +} + +// consumeString consumes a string and may return BadStringToken when a newline is encountered. +func (l *Lexer) consumeString() TokenType { + // assume to be on " or ' + delim := l.r.Peek(0) + l.r.Move(1) + for { + c := l.r.Peek(0) + if c == 0 && l.Err() != nil { + break + } else if c == '\n' || c == '\r' || c == '\f' { + l.r.Move(1) + return BadStringToken + } else if c == delim { + l.r.Move(1) + break + } else if c == '\\' { + if !l.consumeEscape() { + l.r.Move(1) + l.consumeNewline() + } + } else { + l.r.Move(1) + } + } + return StringToken +} + +func (l *Lexer) consumeUnquotedURL() bool { + for { + c := l.r.Peek(0) + if c == 0 && l.Err() != nil || c == ')' { + break + } else if c == '"' || c == '\'' || c == '(' || c == '\\' || c == ' ' || c <= 0x1F || c == 0x7F { + if c != '\\' || !l.consumeEscape() { + return false + } + } else { + l.r.Move(1) + } + } + return true +} + +// consumeRemnantsBadUrl consumes bytes of a BadUrlToken so that normal tokenization may continue. +func (l *Lexer) consumeRemnantsBadURL() { + for { + if l.consumeByte(')') || l.Err() != nil { + break + } else if !l.consumeEscape() { + l.r.Move(1) + } + } +} + +// consumeIdentlike consumes IdentToken, FunctionToken or UrlToken. +func (l *Lexer) consumeIdentlike() TokenType { + if l.consumeIdentToken() { + if l.r.Peek(0) != '(' { + return IdentToken + } else if !parse.EqualFold(bytes.Replace(l.r.Lexeme(), []byte{'\\'}, nil, -1), []byte{'u', 'r', 'l'}) { + l.r.Move(1) + return FunctionToken + } + l.r.Move(1) + + // consume url + for l.consumeWhitespace() { + } + if c := l.r.Peek(0); c == '"' || c == '\'' { + if l.consumeString() == BadStringToken { + l.consumeRemnantsBadURL() + return BadURLToken + } + } else if !l.consumeUnquotedURL() && !l.consumeWhitespace() { + l.consumeRemnantsBadURL() + return BadURLToken + } + for l.consumeWhitespace() { + } + if !l.consumeByte(')') && l.Err() != io.EOF { + l.consumeRemnantsBadURL() + return BadURLToken + } + return URLToken + } + return ErrorToken +} diff --git a/vendor/github.com/tdewolff/parse/css/lex_test.go b/vendor/github.com/tdewolff/parse/css/lex_test.go new file mode 100644 index 0000000..0bdc891 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/css/lex_test.go @@ -0,0 +1,143 @@ +package css // import "github.com/tdewolff/parse/css" + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/tdewolff/test" +) + +type TTs []TokenType + +func TestTokens(t *testing.T) { + var tokenTests = []struct { + css string + expected []TokenType + }{ + {" ", TTs{}}, + {"5.2 .4", TTs{NumberToken, NumberToken}}, + {"color: red;", TTs{IdentToken, ColonToken, IdentToken, SemicolonToken}}, + {"background: url(\"http://x\");", TTs{IdentToken, ColonToken, URLToken, SemicolonToken}}, + {"background: URL(x.png);", TTs{IdentToken, ColonToken, URLToken, SemicolonToken}}, + {"color: rgb(4, 0%, 5em);", TTs{IdentToken, ColonToken, FunctionToken, NumberToken, CommaToken, PercentageToken, CommaToken, DimensionToken, RightParenthesisToken, SemicolonToken}}, + {"body { \"string\" }", TTs{IdentToken, LeftBraceToken, StringToken, RightBraceToken}}, + {"body { \"str\\\"ing\" }", TTs{IdentToken, LeftBraceToken, StringToken, RightBraceToken}}, + {".class { }", TTs{DelimToken, IdentToken, LeftBraceToken, RightBraceToken}}, + {"#class { }", TTs{HashToken, LeftBraceToken, RightBraceToken}}, + {"#class\\#withhash { }", TTs{HashToken, LeftBraceToken, RightBraceToken}}, + {"@media print { }", TTs{AtKeywordToken, IdentToken, LeftBraceToken, RightBraceToken}}, + {"/*comment*/", TTs{CommentToken}}, + {"/*com* /ment*/", TTs{CommentToken}}, + {"~= |= ^= $= *=", TTs{IncludeMatchToken, DashMatchToken, PrefixMatchToken, SuffixMatchToken, SubstringMatchToken}}, + {"||", TTs{ColumnToken}}, + {"<!-- -->", TTs{CDOToken, CDCToken}}, + {"U+1234", TTs{UnicodeRangeToken}}, + {"5.2 .4 4e-22", TTs{NumberToken, NumberToken, NumberToken}}, + {"--custom-variable", TTs{CustomPropertyNameToken}}, + + // unexpected ending + {"ident", TTs{IdentToken}}, + {"123.", TTs{NumberToken, DelimToken}}, + {"\"string", TTs{StringToken}}, + {"123/*comment", TTs{NumberToken, CommentToken}}, + {"U+1-", TTs{IdentToken, NumberToken, DelimToken}}, + + // unicode + {"fooδbar", TTs{IdentToken}}, + {"foo\\æ\\†", TTs{IdentToken}}, + // {"foo\x00bar", TTs{IdentToken}}, + {"'foo\u554abar'", TTs{StringToken}}, + {"\\000026B", TTs{IdentToken}}, + {"\\26 B", TTs{IdentToken}}, + + // hacks + {`\-\mo\z\-b\i\nd\in\g:\url(//business\i\nfo.co.uk\/labs\/xbl\/xbl\.xml\#xss);`, TTs{IdentToken, ColonToken, URLToken, SemicolonToken}}, + {"width/**/:/**/ 40em;", TTs{IdentToken, CommentToken, ColonToken, CommentToken, DimensionToken, SemicolonToken}}, + {":root *> #quince", TTs{ColonToken, IdentToken, DelimToken, DelimToken, HashToken}}, + {"html[xmlns*=\"\"]:root", TTs{IdentToken, LeftBracketToken, IdentToken, SubstringMatchToken, StringToken, RightBracketToken, ColonToken, IdentToken}}, + {"body:nth-of-type(1)", TTs{IdentToken, ColonToken, FunctionToken, NumberToken, RightParenthesisToken}}, + {"color/*\\**/: blue\\9;", TTs{IdentToken, CommentToken, ColonToken, IdentToken, SemicolonToken}}, + {"color: blue !ie;", TTs{IdentToken, ColonToken, IdentToken, DelimToken, IdentToken, SemicolonToken}}, + + // escapes, null and replacement character + {"c\\\x00olor: white;", TTs{IdentToken, ColonToken, IdentToken, SemicolonToken}}, + {"null\\0", TTs{IdentToken}}, + {"eof\\", TTs{IdentToken}}, + {"\"a\x00b\"", TTs{StringToken}}, + {"a\\\x00b", TTs{IdentToken}}, + {"url(a\x00b)", TTs{BadURLToken}}, // null character cannot be unquoted + {"/*a\x00b*/", TTs{CommentToken}}, + + // coverage + {" \n\r\n\r\"\\\r\n\\\r\"", TTs{StringToken}}, + {"U+?????? U+ABCD?? U+ABC-DEF", TTs{UnicodeRangeToken, UnicodeRangeToken, UnicodeRangeToken}}, + {"U+? U+A?", TTs{IdentToken, DelimToken, DelimToken, IdentToken, DelimToken, IdentToken, DelimToken}}, + {"-5.23 -moz", TTs{NumberToken, IdentToken}}, + {"()", TTs{LeftParenthesisToken, RightParenthesisToken}}, + {"url( //url )", TTs{URLToken}}, + {"url( ", TTs{URLToken}}, + {"url( //url", TTs{URLToken}}, + {"url(\")a", TTs{URLToken}}, + {"url(a'\\\n)a", TTs{BadURLToken, IdentToken}}, + {"url(\"\n)a", TTs{BadURLToken, IdentToken}}, + {"url(a h)a", TTs{BadURLToken, IdentToken}}, + {"<!- | @4 ## /2", TTs{DelimToken, DelimToken, DelimToken, DelimToken, DelimToken, NumberToken, DelimToken, DelimToken, DelimToken, NumberToken}}, + {"\"s\\\n\"", TTs{StringToken}}, + {"\"a\\\"b\"", TTs{StringToken}}, + {"\"s\n", TTs{BadStringToken}}, + + // small + {"\"abcd", TTs{StringToken}}, + {"/*comment", TTs{CommentToken}}, + {"U+A-B", TTs{UnicodeRangeToken}}, + {"url((", TTs{BadURLToken}}, + {"id\u554a", TTs{IdentToken}}, + } + for _, tt := range tokenTests { + t.Run(tt.css, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.css)) + i := 0 + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } else if token == WhitespaceToken { + continue + } + test.That(t, i < len(tt.expected), "index", i, "must not exceed expected token types size", len(tt.expected)) + if i < len(tt.expected) { + test.T(t, token, tt.expected[i], "token types must match") + } + i++ + } + }) + } + + test.T(t, WhitespaceToken.String(), "Whitespace") + test.T(t, EmptyToken.String(), "Empty") + test.T(t, CustomPropertyValueToken.String(), "CustomPropertyValue") + test.T(t, TokenType(100).String(), "Invalid(100)") + test.T(t, NewLexer(bytes.NewBufferString("x")).consumeBracket(), ErrorToken, "consumeBracket on 'x' must return error") +} + +//////////////////////////////////////////////////////////////// + +func ExampleNewLexer() { + l := NewLexer(bytes.NewBufferString("color: red;")) + out := "" + for { + tt, data := l.Next() + if tt == ErrorToken { + break + } else if tt == WhitespaceToken || tt == CommentToken { + continue + } + out += string(data) + } + fmt.Println(out) + // Output: color:red; +} diff --git a/vendor/github.com/tdewolff/parse/css/parse.go b/vendor/github.com/tdewolff/parse/css/parse.go new file mode 100644 index 0000000..e48cd66 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/css/parse.go @@ -0,0 +1,398 @@ +package css // import "github.com/tdewolff/parse/css" + +import ( + "bytes" + "io" + "strconv" + + "github.com/tdewolff/parse" +) + +var wsBytes = []byte(" ") +var endBytes = []byte("}") +var emptyBytes = []byte("") + +// GrammarType determines the type of grammar. +type GrammarType uint32 + +// GrammarType values. +const ( + ErrorGrammar GrammarType = iota // extra token when errors occur + CommentGrammar + AtRuleGrammar + BeginAtRuleGrammar + EndAtRuleGrammar + QualifiedRuleGrammar + BeginRulesetGrammar + EndRulesetGrammar + DeclarationGrammar + TokenGrammar + CustomPropertyGrammar +) + +// String returns the string representation of a GrammarType. +func (tt GrammarType) String() string { + switch tt { + case ErrorGrammar: + return "Error" + case CommentGrammar: + return "Comment" + case AtRuleGrammar: + return "AtRule" + case BeginAtRuleGrammar: + return "BeginAtRule" + case EndAtRuleGrammar: + return "EndAtRule" + case QualifiedRuleGrammar: + return "QualifiedRule" + case BeginRulesetGrammar: + return "BeginRuleset" + case EndRulesetGrammar: + return "EndRuleset" + case DeclarationGrammar: + return "Declaration" + case TokenGrammar: + return "Token" + case CustomPropertyGrammar: + return "CustomProperty" + } + return "Invalid(" + strconv.Itoa(int(tt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// State is the state function the parser currently is in. +type State func(*Parser) GrammarType + +// Token is a single TokenType and its associated data. +type Token struct { + TokenType + Data []byte +} + +// Parser is the state for the parser. +type Parser struct { + l *Lexer + state []State + err error + + buf []Token + level int + + tt TokenType + data []byte + prevWS bool + prevEnd bool +} + +// NewParser returns a new CSS parser from an io.Reader. isInline specifies whether this is an inline style attribute. +func NewParser(r io.Reader, isInline bool) *Parser { + l := NewLexer(r) + p := &Parser{ + l: l, + state: make([]State, 0, 4), + } + + if isInline { + p.state = append(p.state, (*Parser).parseDeclarationList) + } else { + p.state = append(p.state, (*Parser).parseStylesheet) + } + return p +} + +// Err returns the error encountered during parsing, this is often io.EOF but also other errors can be returned. +func (p *Parser) Err() error { + if p.err != nil { + return p.err + } + return p.l.Err() +} + +// Restore restores the NULL byte at the end of the buffer. +func (p *Parser) Restore() { + p.l.Restore() +} + +// Next returns the next Grammar. It returns ErrorGrammar when an error was encountered. Using Err() one can retrieve the error message. +func (p *Parser) Next() (GrammarType, TokenType, []byte) { + p.err = nil + + if p.prevEnd { + p.tt, p.data = RightBraceToken, endBytes + p.prevEnd = false + } else { + p.tt, p.data = p.popToken(true) + } + gt := p.state[len(p.state)-1](p) + return gt, p.tt, p.data +} + +// Values returns a slice of Tokens for the last Grammar. Only AtRuleGrammar, BeginAtRuleGrammar, BeginRulesetGrammar and Declaration will return the at-rule components, ruleset selector and declaration values respectively. +func (p *Parser) Values() []Token { + return p.buf +} + +func (p *Parser) popToken(allowComment bool) (TokenType, []byte) { + p.prevWS = false + tt, data := p.l.Next() + for tt == WhitespaceToken || tt == CommentToken { + if tt == WhitespaceToken { + p.prevWS = true + } else if allowComment && len(p.state) == 1 { + break + } + tt, data = p.l.Next() + } + return tt, data +} + +func (p *Parser) initBuf() { + p.buf = p.buf[:0] +} + +func (p *Parser) pushBuf(tt TokenType, data []byte) { + p.buf = append(p.buf, Token{tt, data}) +} + +//////////////////////////////////////////////////////////////// + +func (p *Parser) parseStylesheet() GrammarType { + if p.tt == CDOToken || p.tt == CDCToken { + return TokenGrammar + } else if p.tt == AtKeywordToken { + return p.parseAtRule() + } else if p.tt == CommentToken { + return CommentGrammar + } else if p.tt == ErrorToken { + return ErrorGrammar + } + return p.parseQualifiedRule() +} + +func (p *Parser) parseDeclarationList() GrammarType { + if p.tt == CommentToken { + p.tt, p.data = p.popToken(false) + } + for p.tt == SemicolonToken { + p.tt, p.data = p.popToken(false) + } + if p.tt == ErrorToken { + return ErrorGrammar + } else if p.tt == AtKeywordToken { + return p.parseAtRule() + } else if p.tt == IdentToken { + return p.parseDeclaration() + } else if p.tt == CustomPropertyNameToken { + return p.parseCustomProperty() + } + + // parse error + p.initBuf() + p.err = parse.NewErrorLexer("unexpected token in declaration", p.l.r) + for { + tt, data := p.popToken(false) + if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken { + p.prevEnd = (tt == RightBraceToken) + return ErrorGrammar + } + p.pushBuf(tt, data) + } +} + +//////////////////////////////////////////////////////////////// + +func (p *Parser) parseAtRule() GrammarType { + p.initBuf() + parse.ToLower(p.data) + atRuleName := p.data + if len(atRuleName) > 0 && atRuleName[1] == '-' { + if i := bytes.IndexByte(atRuleName[2:], '-'); i != -1 { + atRuleName = atRuleName[i+2:] // skip vendor specific prefix + } + } + atRule := ToHash(atRuleName[1:]) + + first := true + skipWS := false + for { + tt, data := p.popToken(false) + if tt == LeftBraceToken && p.level == 0 { + if atRule == Font_Face || atRule == Page { + p.state = append(p.state, (*Parser).parseAtRuleDeclarationList) + } else if atRule == Document || atRule == Keyframes || atRule == Media || atRule == Supports { + p.state = append(p.state, (*Parser).parseAtRuleRuleList) + } else { + p.state = append(p.state, (*Parser).parseAtRuleUnknown) + } + return BeginAtRuleGrammar + } else if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken { + p.prevEnd = (tt == RightBraceToken) + return AtRuleGrammar + } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { + p.level++ + } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { + p.level-- + } + if first { + if tt == LeftParenthesisToken || tt == LeftBracketToken { + p.prevWS = false + } + first = false + } + if len(data) == 1 && (data[0] == ',' || data[0] == ':') { + skipWS = true + } else if p.prevWS && !skipWS && tt != RightParenthesisToken { + p.pushBuf(WhitespaceToken, wsBytes) + } else { + skipWS = false + } + if tt == LeftParenthesisToken { + skipWS = true + } + p.pushBuf(tt, data) + } +} + +func (p *Parser) parseAtRuleRuleList() GrammarType { + if p.tt == RightBraceToken || p.tt == ErrorToken { + p.state = p.state[:len(p.state)-1] + return EndAtRuleGrammar + } else if p.tt == AtKeywordToken { + return p.parseAtRule() + } else { + return p.parseQualifiedRule() + } +} + +func (p *Parser) parseAtRuleDeclarationList() GrammarType { + for p.tt == SemicolonToken { + p.tt, p.data = p.popToken(false) + } + if p.tt == RightBraceToken || p.tt == ErrorToken { + p.state = p.state[:len(p.state)-1] + return EndAtRuleGrammar + } + return p.parseDeclarationList() +} + +func (p *Parser) parseAtRuleUnknown() GrammarType { + if p.tt == RightBraceToken && p.level == 0 || p.tt == ErrorToken { + p.state = p.state[:len(p.state)-1] + return EndAtRuleGrammar + } + if p.tt == LeftParenthesisToken || p.tt == LeftBraceToken || p.tt == LeftBracketToken || p.tt == FunctionToken { + p.level++ + } else if p.tt == RightParenthesisToken || p.tt == RightBraceToken || p.tt == RightBracketToken { + p.level-- + } + return TokenGrammar +} + +func (p *Parser) parseQualifiedRule() GrammarType { + p.initBuf() + first := true + inAttrSel := false + skipWS := true + var tt TokenType + var data []byte + for { + if first { + tt, data = p.tt, p.data + p.tt = WhitespaceToken + p.data = emptyBytes + first = false + } else { + tt, data = p.popToken(false) + } + if tt == LeftBraceToken && p.level == 0 { + p.state = append(p.state, (*Parser).parseQualifiedRuleDeclarationList) + return BeginRulesetGrammar + } else if tt == ErrorToken { + p.err = parse.NewErrorLexer("unexpected ending in qualified rule, expected left brace token", p.l.r) + return ErrorGrammar + } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { + p.level++ + } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { + p.level-- + } + if len(data) == 1 && (data[0] == ',' || data[0] == '>' || data[0] == '+' || data[0] == '~') { + if data[0] == ',' { + return QualifiedRuleGrammar + } + skipWS = true + } else if p.prevWS && !skipWS && !inAttrSel { + p.pushBuf(WhitespaceToken, wsBytes) + } else { + skipWS = false + } + if tt == LeftBracketToken { + inAttrSel = true + } else if tt == RightBracketToken { + inAttrSel = false + } + p.pushBuf(tt, data) + } +} + +func (p *Parser) parseQualifiedRuleDeclarationList() GrammarType { + for p.tt == SemicolonToken { + p.tt, p.data = p.popToken(false) + } + if p.tt == RightBraceToken || p.tt == ErrorToken { + p.state = p.state[:len(p.state)-1] + return EndRulesetGrammar + } + return p.parseDeclarationList() +} + +func (p *Parser) parseDeclaration() GrammarType { + p.initBuf() + parse.ToLower(p.data) + if tt, _ := p.popToken(false); tt != ColonToken { + p.err = parse.NewErrorLexer("unexpected token in declaration", p.l.r) + return ErrorGrammar + } + skipWS := true + for { + tt, data := p.popToken(false) + if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken { + p.prevEnd = (tt == RightBraceToken) + return DeclarationGrammar + } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { + p.level++ + } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { + p.level-- + } + if len(data) == 1 && (data[0] == ',' || data[0] == '/' || data[0] == ':' || data[0] == '!' || data[0] == '=') { + skipWS = true + } else if p.prevWS && !skipWS { + p.pushBuf(WhitespaceToken, wsBytes) + } else { + skipWS = false + } + p.pushBuf(tt, data) + } +} + +func (p *Parser) parseCustomProperty() GrammarType { + p.initBuf() + if tt, _ := p.popToken(false); tt != ColonToken { + p.err = parse.NewErrorLexer("unexpected token in declaration", p.l.r) + return ErrorGrammar + } + val := []byte{} + for { + tt, data := p.l.Next() + if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken { + p.prevEnd = (tt == RightBraceToken) + p.pushBuf(CustomPropertyValueToken, val) + return CustomPropertyGrammar + } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { + p.level++ + } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { + p.level-- + } + val = append(val, data...) + } +} diff --git a/vendor/github.com/tdewolff/parse/css/parse_test.go b/vendor/github.com/tdewolff/parse/css/parse_test.go new file mode 100644 index 0000000..33f6f5f --- /dev/null +++ b/vendor/github.com/tdewolff/parse/css/parse_test.go @@ -0,0 +1,248 @@ +package css // import "github.com/tdewolff/parse/css" + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/tdewolff/parse" + "github.com/tdewolff/test" +) + +//////////////////////////////////////////////////////////////// + +func TestParse(t *testing.T) { + var parseTests = []struct { + inline bool + css string + expected string + }{ + {true, " x : y ; ", "x:y;"}, + {true, "color: red;", "color:red;"}, + {true, "color : red;", "color:red;"}, + {true, "color: red; border: 0;", "color:red;border:0;"}, + {true, "color: red !important;", "color:red!important;"}, + {true, "color: red ! important;", "color:red!important;"}, + {true, "white-space: -moz-pre-wrap;", "white-space:-moz-pre-wrap;"}, + {true, "display: -moz-inline-stack;", "display:-moz-inline-stack;"}, + {true, "x: 10px / 1em;", "x:10px/1em;"}, + {true, "x: 1em/1.5em \"Times New Roman\", Times, serif;", "x:1em/1.5em \"Times New Roman\",Times,serif;"}, + {true, "x: hsla(100,50%, 75%, 0.5);", "x:hsla(100,50%,75%,0.5);"}, + {true, "x: hsl(100,50%, 75%);", "x:hsl(100,50%,75%);"}, + {true, "x: rgba(255, 238 , 221, 0.3);", "x:rgba(255,238,221,0.3);"}, + {true, "x: 50vmax;", "x:50vmax;"}, + {true, "color: linear-gradient(to right, black, white);", "color:linear-gradient(to right,black,white);"}, + {true, "color: calc(100%/2 - 1em);", "color:calc(100%/2 - 1em);"}, + {true, "color: calc(100%/2--1em);", "color:calc(100%/2--1em);"}, + {false, "<!-- @charset; -->", "<!--@charset;-->"}, + {false, "@media print, screen { }", "@media print,screen{}"}, + {false, "@media { @viewport ; }", "@media{@viewport;}"}, + {false, "@keyframes 'diagonal-slide' { from { left: 0; top: 0; } to { left: 100px; top: 100px; } }", "@keyframes 'diagonal-slide'{from{left:0;top:0;}to{left:100px;top:100px;}}"}, + {false, "@keyframes movingbox{0%{left:90%;}50%{left:10%;}100%{left:90%;}}", "@keyframes movingbox{0%{left:90%;}50%{left:10%;}100%{left:90%;}}"}, + {false, ".foo { color: #fff;}", ".foo{color:#fff;}"}, + {false, ".foo { ; _color: #fff;}", ".foo{_color:#fff;}"}, + {false, "a { color: red; border: 0; }", "a{color:red;border:0;}"}, + {false, "a { color: red; border: 0; } b { padding: 0; }", "a{color:red;border:0;}b{padding:0;}"}, + {false, "/* comment */", "/* comment */"}, + + // extraordinary + {true, "color: red;;", "color:red;"}, + {true, "color:#c0c0c0", "color:#c0c0c0;"}, + {true, "background:URL(x.png);", "background:URL(x.png);"}, + {true, "filter: progid : DXImageTransform.Microsoft.BasicImage(rotation=1);", "filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=1);"}, + {true, "/*a*/\n/*c*/\nkey: value;", "key:value;"}, + {true, "@-moz-charset;", "@-moz-charset;"}, + {true, "--custom-variable: (0;) ;", "--custom-variable: (0;) ;"}, + {false, "@import;@import;", "@import;@import;"}, + {false, ".a .b#c, .d<.e { x:y; }", ".a .b#c,.d<.e{x:y;}"}, + {false, ".a[b~=c]d { x:y; }", ".a[b~=c]d{x:y;}"}, + // {false, "{x:y;}", "{x:y;}"}, + {false, "a{}", "a{}"}, + {false, "a,.b/*comment*/ {x:y;}", "a,.b{x:y;}"}, + {false, "a,.b/*comment*/.c {x:y;}", "a,.b.c{x:y;}"}, + {false, "a{x:; z:q;}", "a{x:;z:q;}"}, + {false, "@font-face { x:y; }", "@font-face{x:y;}"}, + {false, "a:not([controls]){x:y;}", "a:not([controls]){x:y;}"}, + {false, "@document regexp('https:.*') { p { color: red; } }", "@document regexp('https:.*'){p{color:red;}}"}, + {false, "@media all and ( max-width:400px ) { }", "@media all and (max-width:400px){}"}, + {false, "@media (max-width:400px) { }", "@media(max-width:400px){}"}, + {false, "@media (max-width:400px)", "@media(max-width:400px);"}, + {false, "@font-face { ; font:x; }", "@font-face{font:x;}"}, + {false, "@-moz-font-face { ; font:x; }", "@-moz-font-face{font:x;}"}, + {false, "@unknown abc { {} lala }", "@unknown abc{{}lala}"}, + {false, "a[x={}]{x:y;}", "a[x={}]{x:y;}"}, + {false, "a[x=,]{x:y;}", "a[x=,]{x:y;}"}, + {false, "a[x=+]{x:y;}", "a[x=+]{x:y;}"}, + {false, ".cla .ss > #id { x:y; }", ".cla .ss>#id{x:y;}"}, + {false, ".cla /*a*/ /*b*/ .ss{}", ".cla .ss{}"}, + {false, "a{x:f(a(),b);}", "a{x:f(a(),b);}"}, + {false, "a{x:y!z;}", "a{x:y!z;}"}, + {false, "[class*=\"column\"]+[class*=\"column\"]:last-child{a:b;}", "[class*=\"column\"]+[class*=\"column\"]:last-child{a:b;}"}, + {false, "@media { @viewport }", "@media{@viewport;}"}, + {false, "table { @unknown }", "table{@unknown;}"}, + + // early endings + {false, "selector{", "selector{"}, + {false, "@media{selector{", "@media{selector{"}, + + // bad grammar + {true, "~color:red", "~color:red;"}, + {false, ".foo { *color: #fff;}", ".foo{*color:#fff;}"}, + {true, "*color: red; font-size: 12pt;", "*color:red;font-size:12pt;"}, + {true, "_color: red; font-size: 12pt;", "_color:red;font-size:12pt;"}, + + // issues + {false, "@media print {.class{width:5px;}}", "@media print{.class{width:5px;}}"}, // #6 + {false, ".class{width:calc((50% + 2em)/2 + 14px);}", ".class{width:calc((50% + 2em)/2 + 14px);}"}, // #7 + {false, ".class [c=y]{}", ".class [c=y]{}"}, // tdewolff/minify#16 + {false, "table{font-family:Verdana}", "table{font-family:Verdana;}"}, // tdewolff/minify#22 + + // go-fuzz + {false, "@-webkit-", "@-webkit-;"}, + } + for _, tt := range parseTests { + t.Run(tt.css, func(t *testing.T) { + output := "" + p := NewParser(bytes.NewBufferString(tt.css), tt.inline) + for { + grammar, _, data := p.Next() + data = parse.Copy(data) + if grammar == ErrorGrammar { + if err := p.Err(); err != io.EOF { + for _, val := range p.Values() { + data = append(data, val.Data...) + } + if perr, ok := err.(*parse.Error); ok && perr.Message == "unexpected token in declaration" { + data = append(data, ";"...) + } + } else { + test.T(t, err, io.EOF) + break + } + } else if grammar == AtRuleGrammar || grammar == BeginAtRuleGrammar || grammar == QualifiedRuleGrammar || grammar == BeginRulesetGrammar || grammar == DeclarationGrammar || grammar == CustomPropertyGrammar { + if grammar == DeclarationGrammar || grammar == CustomPropertyGrammar { + data = append(data, ":"...) + } + for _, val := range p.Values() { + data = append(data, val.Data...) + } + if grammar == BeginAtRuleGrammar || grammar == BeginRulesetGrammar { + data = append(data, "{"...) + } else if grammar == AtRuleGrammar || grammar == DeclarationGrammar || grammar == CustomPropertyGrammar { + data = append(data, ";"...) + } else if grammar == QualifiedRuleGrammar { + data = append(data, ","...) + } + } + output += string(data) + } + test.String(t, output, tt.expected) + }) + } + + test.T(t, ErrorGrammar.String(), "Error") + test.T(t, AtRuleGrammar.String(), "AtRule") + test.T(t, BeginAtRuleGrammar.String(), "BeginAtRule") + test.T(t, EndAtRuleGrammar.String(), "EndAtRule") + test.T(t, BeginRulesetGrammar.String(), "BeginRuleset") + test.T(t, EndRulesetGrammar.String(), "EndRuleset") + test.T(t, DeclarationGrammar.String(), "Declaration") + test.T(t, TokenGrammar.String(), "Token") + test.T(t, CommentGrammar.String(), "Comment") + test.T(t, CustomPropertyGrammar.String(), "CustomProperty") + test.T(t, GrammarType(100).String(), "Invalid(100)") +} + +func TestParseError(t *testing.T) { + var parseErrorTests = []struct { + inline bool + css string + col int + }{ + {false, "selector", 9}, + {true, "color 0", 8}, + {true, "--color 0", 10}, + {true, "--custom-variable:0", 0}, + } + for _, tt := range parseErrorTests { + t.Run(tt.css, func(t *testing.T) { + p := NewParser(bytes.NewBufferString(tt.css), tt.inline) + for { + grammar, _, _ := p.Next() + if grammar == ErrorGrammar { + if tt.col == 0 { + test.T(t, p.Err(), io.EOF) + } else if perr, ok := p.Err().(*parse.Error); ok { + test.T(t, perr.Col, tt.col) + } else { + test.Fail(t, "bad error:", p.Err()) + } + break + } + } + }) + } +} + +func TestReader(t *testing.T) { + input := "x:a;" + p := NewParser(test.NewPlainReader(bytes.NewBufferString(input)), true) + for { + grammar, _, _ := p.Next() + if grammar == ErrorGrammar { + break + } + } +} + +//////////////////////////////////////////////////////////////// + +type Obj struct{} + +func (*Obj) F() {} + +var f1 func(*Obj) + +func BenchmarkFuncPtr(b *testing.B) { + for i := 0; i < b.N; i++ { + f1 = (*Obj).F + } +} + +var f2 func() + +func BenchmarkMemFuncPtr(b *testing.B) { + obj := &Obj{} + for i := 0; i < b.N; i++ { + f2 = obj.F + } +} + +func ExampleNewParser() { + p := NewParser(bytes.NewBufferString("color: red;"), true) // false because this is the content of an inline style attribute + out := "" + for { + gt, _, data := p.Next() + if gt == ErrorGrammar { + break + } else if gt == AtRuleGrammar || gt == BeginAtRuleGrammar || gt == BeginRulesetGrammar || gt == DeclarationGrammar { + out += string(data) + if gt == DeclarationGrammar { + out += ":" + } + for _, val := range p.Values() { + out += string(val.Data) + } + if gt == BeginAtRuleGrammar || gt == BeginRulesetGrammar { + out += "{" + } else if gt == AtRuleGrammar || gt == DeclarationGrammar { + out += ";" + } + } else { + out += string(data) + } + } + fmt.Println(out) + // Output: color:red; +} diff --git a/vendor/github.com/tdewolff/parse/css/util.go b/vendor/github.com/tdewolff/parse/css/util.go new file mode 100644 index 0000000..676dee8 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/css/util.go @@ -0,0 +1,47 @@ +package css // import "github.com/tdewolff/parse/css" + +import "github.com/tdewolff/parse/buffer" + +// IsIdent returns true if the bytes are a valid identifier. +func IsIdent(b []byte) bool { + l := NewLexer(buffer.NewReader(b)) + l.consumeIdentToken() + l.r.Restore() + return l.r.Pos() == len(b) +} + +// IsURLUnquoted returns true if the bytes are a valid unquoted URL. +func IsURLUnquoted(b []byte) bool { + l := NewLexer(buffer.NewReader(b)) + l.consumeUnquotedURL() + l.r.Restore() + return l.r.Pos() == len(b) +} + +// HSL2RGB converts HSL to RGB with all of range [0,1] +// from http://www.w3.org/TR/css3-color/#hsl-color +func HSL2RGB(h, s, l float64) (float64, float64, float64) { + m2 := l * (s + 1) + if l > 0.5 { + m2 = l + s - l*s + } + m1 := l*2 - m2 + return hue2rgb(m1, m2, h+1.0/3.0), hue2rgb(m1, m2, h), hue2rgb(m1, m2, h-1.0/3.0) +} + +func hue2rgb(m1, m2, h float64) float64 { + if h < 0.0 { + h += 1.0 + } + if h > 1.0 { + h -= 1.0 + } + if h*6.0 < 1.0 { + return m1 + (m2-m1)*h*6.0 + } else if h*2.0 < 1.0 { + return m2 + } else if h*3.0 < 2.0 { + return m1 + (m2-m1)*(2.0/3.0-h)*6.0 + } + return m1 +} diff --git a/vendor/github.com/tdewolff/parse/css/util_test.go b/vendor/github.com/tdewolff/parse/css/util_test.go new file mode 100644 index 0000000..9eb5aa9 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/css/util_test.go @@ -0,0 +1,34 @@ +package css // import "github.com/tdewolff/parse/css" + +import ( + "testing" + + "github.com/tdewolff/test" +) + +func TestIsIdent(t *testing.T) { + test.That(t, IsIdent([]byte("color"))) + test.That(t, !IsIdent([]byte("4.5"))) +} + +func TestIsURLUnquoted(t *testing.T) { + test.That(t, IsURLUnquoted([]byte("http://x"))) + test.That(t, !IsURLUnquoted([]byte(")"))) +} + +func TestHsl2Rgb(t *testing.T) { + r, g, b := HSL2RGB(0.0, 1.0, 0.5) + test.T(t, r, 1.0) + test.T(t, g, 0.0) + test.T(t, b, 0.0) + + r, g, b = HSL2RGB(1.0, 1.0, 0.5) + test.T(t, r, 1.0) + test.T(t, g, 0.0) + test.T(t, b, 0.0) + + r, g, b = HSL2RGB(0.66, 0.0, 1.0) + test.T(t, r, 1.0) + test.T(t, g, 1.0) + test.T(t, b, 1.0) +} diff --git a/vendor/github.com/tdewolff/parse/error.go b/vendor/github.com/tdewolff/parse/error.go new file mode 100644 index 0000000..1e85f9b --- /dev/null +++ b/vendor/github.com/tdewolff/parse/error.go @@ -0,0 +1,35 @@ +package parse + +import ( + "fmt" + "io" + + "github.com/tdewolff/parse/buffer" +) + +type Error struct { + Message string + Line int + Col int + Context string +} + +func NewError(msg string, r io.Reader, offset int) *Error { + line, col, context, _ := Position(r, offset) + return &Error{ + msg, + line, + col, + context, + } +} + +func NewErrorLexer(msg string, l *buffer.Lexer) *Error { + r := buffer.NewReader(l.Bytes()) + offset := l.Offset() + return NewError(msg, r, offset) +} + +func (e *Error) Error() string { + return fmt.Sprintf("parse error:%d:%d: %s\n%s", e.Line, e.Col, e.Message, e.Context) +} diff --git a/vendor/github.com/tdewolff/parse/html/README.md b/vendor/github.com/tdewolff/parse/html/README.md new file mode 100644 index 0000000..edca629 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/html/README.md @@ -0,0 +1,98 @@ +# HTML [![GoDoc](http://godoc.org/github.com/tdewolff/parse/html?status.svg)](http://godoc.org/github.com/tdewolff/parse/html) [![GoCover](http://gocover.io/_badge/github.com/tdewolff/parse/html)](http://gocover.io/github.com/tdewolff/parse/html) + +This package is an HTML5 lexer written in [Go][1]. It follows the specification at [The HTML syntax](http://www.w3.org/TR/html5/syntax.html). The lexer takes an io.Reader and converts it into tokens until the EOF. + +## Installation +Run the following command + + go get github.com/tdewolff/parse/html + +or add the following import and run project with `go get` + + import "github.com/tdewolff/parse/html" + +## Lexer +### Usage +The following initializes a new Lexer with io.Reader `r`: +``` go +l := html.NewLexer(r) +``` + +To tokenize until EOF an error, use: +``` go +for { + tt, data := l.Next() + switch tt { + case html.ErrorToken: + // error or EOF set in l.Err() + return + case html.StartTagToken: + // ... + for { + ttAttr, dataAttr := l.Next() + if ttAttr != html.AttributeToken { + break + } + // ... + } + // ... + } +} +``` + +All tokens: +``` go +ErrorToken TokenType = iota // extra token when errors occur +CommentToken +DoctypeToken +StartTagToken +StartTagCloseToken +StartTagVoidToken +EndTagToken +AttributeToken +TextToken +``` + +### Examples +``` go +package main + +import ( + "os" + + "github.com/tdewolff/parse/html" +) + +// Tokenize HTML from stdin. +func main() { + l := html.NewLexer(os.Stdin) + for { + tt, data := l.Next() + switch tt { + case html.ErrorToken: + if l.Err() != io.EOF { + fmt.Println("Error on line", l.Line(), ":", l.Err()) + } + return + case html.StartTagToken: + fmt.Println("Tag", string(data)) + for { + ttAttr, dataAttr := l.Next() + if ttAttr != html.AttributeToken { + break + } + + key := dataAttr + val := l.AttrVal() + fmt.Println("Attribute", string(key), "=", string(val)) + } + // ... + } + } +} +``` + +## License +Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). + +[1]: http://golang.org/ "Go Language" diff --git a/vendor/github.com/tdewolff/parse/html/hash.go b/vendor/github.com/tdewolff/parse/html/hash.go new file mode 100644 index 0000000..382e5c5 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/html/hash.go @@ -0,0 +1,831 @@ +package html + +// generated by hasher -type=Hash -file=hash.go; DO NOT EDIT, except for adding more constants to the list and rerun go generate + +// uses github.com/tdewolff/hasher +//go:generate hasher -type=Hash -file=hash.go + +// Hash defines perfect hashes for a predefined list of strings +type Hash uint32 + +// Unique hash definitions to be used instead of strings +const ( + A Hash = 0x1 // a + Abbr Hash = 0x4 // abbr + Accept Hash = 0x3206 // accept + Accept_Charset Hash = 0x320e // accept-charset + Accesskey Hash = 0x4409 // accesskey + Acronym Hash = 0xbb07 // acronym + Action Hash = 0x2ba06 // action + Address Hash = 0x67e07 // address + Align Hash = 0x1605 // align + Alink Hash = 0xd205 // alink + Allowfullscreen Hash = 0x23d0f // allowfullscreen + Alt Hash = 0xee03 // alt + Annotation Hash = 0x2070a // annotation + AnnotationXml Hash = 0x2070d // annotationXml + Applet Hash = 0x14506 // applet + Area Hash = 0x38d04 // area + Article Hash = 0x40e07 // article + Aside Hash = 0x8305 // aside + Async Hash = 0xfa05 // async + Audio Hash = 0x11605 // audio + Autocomplete Hash = 0x12e0c // autocomplete + Autofocus Hash = 0x13a09 // autofocus + Autoplay Hash = 0x14f08 // autoplay + Axis Hash = 0x15704 // axis + B Hash = 0x101 // b + Background Hash = 0x1e0a // background + Base Hash = 0x45404 // base + Basefont Hash = 0x45408 // basefont + Bdi Hash = 0xcb03 // bdi + Bdo Hash = 0x18403 // bdo + Bgcolor Hash = 0x19707 // bgcolor + Bgsound Hash = 0x19e07 // bgsound + Big Hash = 0x1a603 // big + Blink Hash = 0x1a905 // blink + Blockquote Hash = 0x1ae0a // blockquote + Body Hash = 0x4004 // body + Border Hash = 0x33806 // border + Br Hash = 0x202 // br + Button Hash = 0x1b806 // button + Canvas Hash = 0x7f06 // canvas + Caption Hash = 0x27f07 // caption + Center Hash = 0x62a06 // center + Challenge Hash = 0x1e509 // challenge + Charset Hash = 0x3907 // charset + Checked Hash = 0x3b407 // checked + Cite Hash = 0xfe04 // cite + Class Hash = 0x1c305 // class + Classid Hash = 0x1c307 // classid + Clear Hash = 0x41205 // clear + Code Hash = 0x1d604 // code + Codebase Hash = 0x45008 // codebase + Codetype Hash = 0x1d608 // codetype + Col Hash = 0x19903 // col + Colgroup Hash = 0x1ee08 // colgroup + Color Hash = 0x19905 // color + Cols Hash = 0x20204 // cols + Colspan Hash = 0x20207 // colspan + Command Hash = 0x21407 // command + Compact Hash = 0x21b07 // compact + Content Hash = 0x4a907 // content + Contenteditable Hash = 0x4a90f // contenteditable + Contextmenu Hash = 0x3bd0b // contextmenu + Controls Hash = 0x22a08 // controls + Coords Hash = 0x23606 // coords + Crossorigin Hash = 0x25b0b // crossorigin + Data Hash = 0x4c004 // data + Datalist Hash = 0x4c008 // datalist + Datetime Hash = 0x2ea08 // datetime + Dd Hash = 0x31602 // dd + Declare Hash = 0x8607 // declare + Default Hash = 0x5407 // default + DefaultChecked Hash = 0x5040e // defaultChecked + DefaultMuted Hash = 0x5650c // defaultMuted + DefaultSelected Hash = 0x540f // defaultSelected + Defer Hash = 0x6205 // defer + Del Hash = 0x7203 // del + Desc Hash = 0x7c04 // desc + Details Hash = 0x9207 // details + Dfn Hash = 0xab03 // dfn + Dialog Hash = 0xcc06 // dialog + Dir Hash = 0xd903 // dir + Dirname Hash = 0xd907 // dirname + Disabled Hash = 0x10408 // disabled + Div Hash = 0x10b03 // div + Dl Hash = 0x1a402 // dl + Download Hash = 0x48608 // download + Draggable Hash = 0x1c909 // draggable + Dropzone Hash = 0x41908 // dropzone + Dt Hash = 0x60602 // dt + Em Hash = 0x6e02 // em + Embed Hash = 0x6e05 // embed + Enabled Hash = 0x4e07 // enabled + Enctype Hash = 0x2cf07 // enctype + Face Hash = 0x62804 // face + Fieldset Hash = 0x26c08 // fieldset + Figcaption Hash = 0x27c0a // figcaption + Figure Hash = 0x29006 // figure + Font Hash = 0x45804 // font + Footer Hash = 0xf106 // footer + For Hash = 0x29c03 // for + ForeignObject Hash = 0x29c0d // foreignObject + Foreignobject Hash = 0x2a90d // foreignobject + Form Hash = 0x2b604 // form + Formaction Hash = 0x2b60a // formaction + Formenctype Hash = 0x2cb0b // formenctype + Formmethod Hash = 0x2d60a // formmethod + Formnovalidate Hash = 0x2e00e // formnovalidate + Formtarget Hash = 0x2f50a // formtarget + Frame Hash = 0xa305 // frame + Frameborder Hash = 0x3330b // frameborder + Frameset Hash = 0xa308 // frameset + H1 Hash = 0x19502 // h1 + H2 Hash = 0x32402 // h2 + H3 Hash = 0x34902 // h3 + H4 Hash = 0x38602 // h4 + H5 Hash = 0x60802 // h5 + H6 Hash = 0x2ff02 // h6 + Head Hash = 0x37204 // head + Header Hash = 0x37206 // header + Headers Hash = 0x37207 // headers + Height Hash = 0x30106 // height + Hgroup Hash = 0x30906 // hgroup + Hidden Hash = 0x31406 // hidden + High Hash = 0x32104 // high + Hr Hash = 0xaf02 // hr + Href Hash = 0xaf04 // href + Hreflang Hash = 0xaf08 // hreflang + Html Hash = 0x30504 // html + Http_Equiv Hash = 0x3260a // http-equiv + I Hash = 0x601 // i + Icon Hash = 0x4a804 // icon + Id Hash = 0x8502 // id + Iframe Hash = 0x33206 // iframe + Image Hash = 0x33e05 // image + Img Hash = 0x34303 // img + Inert Hash = 0x55005 // inert + Input Hash = 0x47305 // input + Ins Hash = 0x26403 // ins + Isindex Hash = 0x15907 // isindex + Ismap Hash = 0x34b05 // ismap + Itemid Hash = 0xff06 // itemid + Itemprop Hash = 0x58808 // itemprop + Itemref Hash = 0x62207 // itemref + Itemscope Hash = 0x35609 // itemscope + Itemtype Hash = 0x36008 // itemtype + Kbd Hash = 0xca03 // kbd + Keygen Hash = 0x4a06 // keygen + Keytype Hash = 0x68807 // keytype + Kind Hash = 0xd604 // kind + Label Hash = 0x7405 // label + Lang Hash = 0xb304 // lang + Language Hash = 0xb308 // language + Legend Hash = 0x1d006 // legend + Li Hash = 0x1702 // li + Link Hash = 0xd304 // link + List Hash = 0x4c404 // list + Listing Hash = 0x4c407 // listing + Longdesc Hash = 0x7808 // longdesc + Loop Hash = 0x12104 // loop + Low Hash = 0x23f03 // low + Main Hash = 0x1004 // main + Malignmark Hash = 0xc10a // malignmark + Manifest Hash = 0x65e08 // manifest + Map Hash = 0x14403 // map + Mark Hash = 0xc704 // mark + Marquee Hash = 0x36807 // marquee + Math Hash = 0x36f04 // math + Max Hash = 0x37e03 // max + Maxlength Hash = 0x37e09 // maxlength + Media Hash = 0xde05 // media + Mediagroup Hash = 0xde0a // mediagroup + Menu Hash = 0x3c404 // menu + Meta Hash = 0x4d304 // meta + Meter Hash = 0x2f005 // meter + Method Hash = 0x2da06 // method + Mglyph Hash = 0x34406 // mglyph + Mi Hash = 0x2c02 // mi + Min Hash = 0x2c03 // min + Mn Hash = 0x2e302 // mn + Mo Hash = 0x4f702 // mo + Ms Hash = 0x35902 // ms + Mtext Hash = 0x38805 // mtext + Multiple Hash = 0x39608 // multiple + Muted Hash = 0x39e05 // muted + Name Hash = 0xdc04 // name + Nav Hash = 0x1303 // nav + Nobr Hash = 0x1a04 // nobr + Noembed Hash = 0x6c07 // noembed + Noframes Hash = 0xa108 // noframes + Nohref Hash = 0xad06 // nohref + Noresize Hash = 0x24b08 // noresize + Noscript Hash = 0x31908 // noscript + Noshade Hash = 0x4ff07 // noshade + Novalidate Hash = 0x2e40a // novalidate + Nowrap Hash = 0x59106 // nowrap + Object Hash = 0x2b006 // object + Ol Hash = 0x17102 // ol + Onabort Hash = 0x1bc07 // onabort + Onafterprint Hash = 0x2840c // onafterprint + Onbeforeprint Hash = 0x2be0d // onbeforeprint + Onbeforeunload Hash = 0x6720e // onbeforeunload + Onblur Hash = 0x17e06 // onblur + Oncancel Hash = 0x11a08 // oncancel + Oncanplay Hash = 0x18609 // oncanplay + Oncanplaythrough Hash = 0x18610 // oncanplaythrough + Onchange Hash = 0x42f08 // onchange + Onclick Hash = 0x6b607 // onclick + Onclose Hash = 0x3a307 // onclose + Oncontextmenu Hash = 0x3bb0d // oncontextmenu + Oncuechange Hash = 0x3c80b // oncuechange + Ondblclick Hash = 0x3d30a // ondblclick + Ondrag Hash = 0x3dd06 // ondrag + Ondragend Hash = 0x3dd09 // ondragend + Ondragenter Hash = 0x3e60b // ondragenter + Ondragleave Hash = 0x3f10b // ondragleave + Ondragover Hash = 0x3fc0a // ondragover + Ondragstart Hash = 0x4060b // ondragstart + Ondrop Hash = 0x41706 // ondrop + Ondurationchange Hash = 0x42710 // ondurationchange + Onemptied Hash = 0x41e09 // onemptied + Onended Hash = 0x43707 // onended + Onerror Hash = 0x43e07 // onerror + Onfocus Hash = 0x44507 // onfocus + Onhashchange Hash = 0x4650c // onhashchange + Oninput Hash = 0x47107 // oninput + Oninvalid Hash = 0x47809 // oninvalid + Onkeydown Hash = 0x48109 // onkeydown + Onkeypress Hash = 0x48e0a // onkeypress + Onkeyup Hash = 0x49e07 // onkeyup + Onload Hash = 0x4b806 // onload + Onloadeddata Hash = 0x4b80c // onloadeddata + Onloadedmetadata Hash = 0x4cb10 // onloadedmetadata + Onloadstart Hash = 0x4e10b // onloadstart + Onmessage Hash = 0x4ec09 // onmessage + Onmousedown Hash = 0x4f50b // onmousedown + Onmousemove Hash = 0x5120b // onmousemove + Onmouseout Hash = 0x51d0a // onmouseout + Onmouseover Hash = 0x52a0b // onmouseover + Onmouseup Hash = 0x53509 // onmouseup + Onmousewheel Hash = 0x53e0c // onmousewheel + Onoffline Hash = 0x54a09 // onoffline + Ononline Hash = 0x55508 // ononline + Onpagehide Hash = 0x55d0a // onpagehide + Onpageshow Hash = 0x5710a // onpageshow + Onpause Hash = 0x57d07 // onpause + Onplay Hash = 0x59c06 // onplay + Onplaying Hash = 0x59c09 // onplaying + Onpopstate Hash = 0x5a50a // onpopstate + Onprogress Hash = 0x5af0a // onprogress + Onratechange Hash = 0x5be0c // onratechange + Onreset Hash = 0x5ca07 // onreset + Onresize Hash = 0x5d108 // onresize + Onscroll Hash = 0x5d908 // onscroll + Onseeked Hash = 0x5e408 // onseeked + Onseeking Hash = 0x5ec09 // onseeking + Onselect Hash = 0x5f508 // onselect + Onshow Hash = 0x5ff06 // onshow + Onstalled Hash = 0x60a09 // onstalled + Onstorage Hash = 0x61309 // onstorage + Onsubmit Hash = 0x61c08 // onsubmit + Onsuspend Hash = 0x63009 // onsuspend + Ontimeupdate Hash = 0x4590c // ontimeupdate + Onunload Hash = 0x63908 // onunload + Onvolumechange Hash = 0x6410e // onvolumechange + Onwaiting Hash = 0x64f09 // onwaiting + Open Hash = 0x58e04 // open + Optgroup Hash = 0x12308 // optgroup + Optimum Hash = 0x65807 // optimum + Option Hash = 0x66e06 // option + Output Hash = 0x52406 // output + P Hash = 0xc01 // p + Param Hash = 0xc05 // param + Pattern Hash = 0x9b07 // pattern + Pauseonexit Hash = 0x57f0b // pauseonexit + Picture Hash = 0xe707 // picture + Ping Hash = 0x12a04 // ping + Placeholder Hash = 0x16b0b // placeholder + Plaintext Hash = 0x1f509 // plaintext + Poster Hash = 0x30e06 // poster + Pre Hash = 0x34f03 // pre + Preload Hash = 0x34f07 // preload + Profile Hash = 0x66707 // profile + Progress Hash = 0x5b108 // progress + Prompt Hash = 0x59606 // prompt + Public Hash = 0x4a406 // public + Q Hash = 0x8d01 // q + Radiogroup Hash = 0x30a // radiogroup + Rb Hash = 0x1d02 // rb + Readonly Hash = 0x38e08 // readonly + Rel Hash = 0x35003 // rel + Required Hash = 0x8b08 // required + Rev Hash = 0x29403 // rev + Reversed Hash = 0x29408 // reversed + Rows Hash = 0x6604 // rows + Rowspan Hash = 0x6607 // rowspan + Rp Hash = 0x28a02 // rp + Rt Hash = 0x1c102 // rt + Rtc Hash = 0x1c103 // rtc + Ruby Hash = 0xf604 // ruby + Rules Hash = 0x17505 // rules + S Hash = 0x3d01 // s + Samp Hash = 0x9804 // samp + Sandbox Hash = 0x16307 // sandbox + Scope Hash = 0x35a05 // scope + Scoped Hash = 0x35a06 // scoped + Script Hash = 0x31b06 // script + Scrolling Hash = 0x5db09 // scrolling + Seamless Hash = 0x3a808 // seamless + Section Hash = 0x17907 // section + Select Hash = 0x5f706 // select + Selected Hash = 0x5f708 // selected + Shape Hash = 0x23105 // shape + Size Hash = 0x24f04 // size + Sizes Hash = 0x24f05 // sizes + Small Hash = 0x23b05 // small + Sortable Hash = 0x25308 // sortable + Source Hash = 0x26606 // source + Spacer Hash = 0x37806 // spacer + Span Hash = 0x6904 // span + Spellcheck Hash = 0x3af0a // spellcheck + Src Hash = 0x44b03 // src + Srcdoc Hash = 0x44b06 // srcdoc + Srclang Hash = 0x49707 // srclang + Srcset Hash = 0x5b806 // srcset + Start Hash = 0x40c05 // start + Step Hash = 0x66404 // step + Strike Hash = 0x68406 // strike + Strong Hash = 0x68f06 // strong + Style Hash = 0x69505 // style + Sub Hash = 0x61e03 // sub + Summary Hash = 0x69a07 // summary + Sup Hash = 0x6a103 // sup + Svg Hash = 0x6a403 // svg + System Hash = 0x6a706 // system + Tabindex Hash = 0x4d908 // tabindex + Table Hash = 0x25605 // table + Target Hash = 0x2f906 // target + Tbody Hash = 0x3f05 // tbody + Td Hash = 0xaa02 // td + Template Hash = 0x6aa08 // template + Text Hash = 0x1fa04 // text + Textarea Hash = 0x38908 // textarea + Tfoot Hash = 0xf005 // tfoot + Th Hash = 0x18f02 // th + Thead Hash = 0x37105 // thead + Time Hash = 0x2ee04 // time + Title Hash = 0x14a05 // title + Tr Hash = 0x1fd02 // tr + Track Hash = 0x1fd05 // track + Translate Hash = 0x22109 // translate + Truespeed Hash = 0x27309 // truespeed + Tt Hash = 0x9d02 // tt + Type Hash = 0x11204 // type + Typemustmatch Hash = 0x1da0d // typemustmatch + U Hash = 0xb01 // u + Ul Hash = 0x5802 // ul + Undeterminate Hash = 0x250d // undeterminate + Usemap Hash = 0x14106 // usemap + Valign Hash = 0x1506 // valign + Value Hash = 0x10d05 // value + Valuetype Hash = 0x10d09 // valuetype + Var Hash = 0x32f03 // var + Video Hash = 0x6b205 // video + Visible Hash = 0x6bd07 // visible + Vlink Hash = 0x6c405 // vlink + Wbr Hash = 0x57a03 // wbr + Width Hash = 0x60405 // width + Wrap Hash = 0x59304 // wrap + Xmlns Hash = 0x15f05 // xmlns + Xmp Hash = 0x16903 // xmp +) + +// String returns the hash' name. +func (i Hash) String() string { + start := uint32(i >> 8) + n := uint32(i & 0xff) + if start+n > uint32(len(_Hash_text)) { + return "" + } + return _Hash_text[start : start+n] +} + +// ToHash returns the hash whose name is s. It returns zero if there is no +// such hash. It is case sensitive. +func ToHash(s []byte) Hash { + if len(s) == 0 || len(s) > _Hash_maxLen { + return 0 + } + h := uint32(_Hash_hash0) + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= 16777619 + } + if i := _Hash_table[h&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { + t := _Hash_text[i>>8 : i>>8+i&0xff] + for i := 0; i < len(s); i++ { + if t[i] != s[i] { + goto NEXT + } + } + return i + } +NEXT: + if i := _Hash_table[(h>>16)&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { + t := _Hash_text[i>>8 : i>>8+i&0xff] + for i := 0; i < len(s); i++ { + if t[i] != s[i] { + return 0 + } + } + return i + } + return 0 +} + +const _Hash_hash0 = 0x5334b67c +const _Hash_maxLen = 16 +const _Hash_text = "abbradiogrouparamainavalignobrbackgroundeterminateaccept-cha" + + "rsetbodyaccesskeygenabledefaultSelectedeferowspanoembedelabe" + + "longdescanvasideclarequiredetailsampatternoframesetdfnohrefl" + + "anguageacronymalignmarkbdialogalinkindirnamediagroupictureal" + + "tfooterubyasyncitemidisabledivaluetypeaudioncancelooptgroupi" + + "ngautocompleteautofocusemappletitleautoplayaxisindexmlnsandb" + + "oxmplaceholderulesectionblurbdoncanplaythrough1bgcolorbgsoun" + + "dlbigblinkblockquotebuttonabortclassidraggablegendcodetypemu" + + "stmatchallengecolgrouplaintextrackcolspannotationXmlcommandc" + + "ompactranslatecontrolshapecoordsmallowfullscreenoresizesorta" + + "blecrossoriginsourcefieldsetruespeedfigcaptionafterprintfigu" + + "reversedforeignObjectforeignobjectformactionbeforeprintforme" + + "nctypeformmethodformnovalidatetimeterformtargeth6heightmlhgr" + + "ouposterhiddenoscripthigh2http-equivariframeborderimageimgly" + + "ph3ismapreloaditemscopeditemtypemarqueematheaderspacermaxlen" + + "gth4mtextareadonlymultiplemutedoncloseamlesspellcheckedoncon" + + "textmenuoncuechangeondblclickondragendondragenterondragleave" + + "ondragoverondragstarticlearondropzonemptiedondurationchangeo" + + "nendedonerroronfocusrcdocodebasefontimeupdateonhashchangeoni" + + "nputoninvalidonkeydownloadonkeypressrclangonkeyupublicontent" + + "editableonloadeddatalistingonloadedmetadatabindexonloadstart" + + "onmessageonmousedownoshadefaultCheckedonmousemoveonmouseoutp" + + "utonmouseoveronmouseuponmousewheelonofflinertononlineonpageh" + + "idefaultMutedonpageshowbronpauseonexitempropenowrapromptonpl" + + "ayingonpopstateonprogressrcsetonratechangeonresetonresizeons" + + "crollingonseekedonseekingonselectedonshowidth5onstalledonsto" + + "rageonsubmitemrefacenteronsuspendonunloadonvolumechangeonwai" + + "tingoptimumanifesteprofileoptionbeforeunloaddresstrikeytypes" + + "trongstylesummarysupsvgsystemplatevideonclickvisiblevlink" + +var _Hash_table = [1 << 9]Hash{ + 0x0: 0x2cb0b, // formenctype + 0x1: 0x2d60a, // formmethod + 0x2: 0x3c80b, // oncuechange + 0x3: 0x3dd06, // ondrag + 0x6: 0x68406, // strike + 0x7: 0x6b205, // video + 0x9: 0x4a907, // content + 0xa: 0x4e07, // enabled + 0xb: 0x59106, // nowrap + 0xc: 0xd304, // link + 0xe: 0x28a02, // rp + 0xf: 0x2840c, // onafterprint + 0x10: 0x14506, // applet + 0x11: 0xf005, // tfoot + 0x12: 0x5040e, // defaultChecked + 0x13: 0x3330b, // frameborder + 0x14: 0xf106, // footer + 0x15: 0x5f708, // selected + 0x16: 0x49707, // srclang + 0x18: 0x52a0b, // onmouseover + 0x19: 0x1d604, // code + 0x1b: 0x47809, // oninvalid + 0x1c: 0x62804, // face + 0x1e: 0x3bd0b, // contextmenu + 0x1f: 0xa308, // frameset + 0x21: 0x5650c, // defaultMuted + 0x22: 0x19905, // color + 0x23: 0x59c06, // onplay + 0x25: 0x2f005, // meter + 0x26: 0x61309, // onstorage + 0x27: 0x38e08, // readonly + 0x29: 0x66707, // profile + 0x2a: 0x8607, // declare + 0x2b: 0xb01, // u + 0x2c: 0x31908, // noscript + 0x2d: 0x65e08, // manifest + 0x2e: 0x1b806, // button + 0x2f: 0x2ea08, // datetime + 0x30: 0x47305, // input + 0x31: 0x5407, // default + 0x32: 0x1d608, // codetype + 0x33: 0x2a90d, // foreignobject + 0x34: 0x36807, // marquee + 0x36: 0x19707, // bgcolor + 0x37: 0x19502, // h1 + 0x39: 0x1e0a, // background + 0x3b: 0x2f50a, // formtarget + 0x41: 0x2f906, // target + 0x43: 0x23b05, // small + 0x44: 0x45008, // codebase + 0x45: 0x55005, // inert + 0x47: 0x38805, // mtext + 0x48: 0x6607, // rowspan + 0x49: 0x2be0d, // onbeforeprint + 0x4a: 0x55508, // ononline + 0x4c: 0x29006, // figure + 0x4d: 0x4cb10, // onloadedmetadata + 0x4e: 0xbb07, // acronym + 0x50: 0x39608, // multiple + 0x51: 0x320e, // accept-charset + 0x52: 0x24f05, // sizes + 0x53: 0x29c0d, // foreignObject + 0x55: 0x2e40a, // novalidate + 0x56: 0x55d0a, // onpagehide + 0x57: 0x2e302, // mn + 0x58: 0x38602, // h4 + 0x5a: 0x1c102, // rt + 0x5b: 0xd205, // alink + 0x5e: 0x59606, // prompt + 0x5f: 0x17102, // ol + 0x61: 0x5d108, // onresize + 0x64: 0x69a07, // summary + 0x65: 0x5a50a, // onpopstate + 0x66: 0x38d04, // area + 0x68: 0x64f09, // onwaiting + 0x6b: 0xdc04, // name + 0x6c: 0x23606, // coords + 0x6d: 0x34303, // img + 0x6e: 0x66404, // step + 0x6f: 0x5ec09, // onseeking + 0x70: 0x32104, // high + 0x71: 0x49e07, // onkeyup + 0x72: 0x5f706, // select + 0x73: 0x1fd05, // track + 0x74: 0x34b05, // ismap + 0x76: 0x47107, // oninput + 0x77: 0x8d01, // q + 0x78: 0x48109, // onkeydown + 0x79: 0x33e05, // image + 0x7a: 0x2b604, // form + 0x7b: 0x60a09, // onstalled + 0x7c: 0xe707, // picture + 0x7d: 0x42f08, // onchange + 0x7e: 0x1a905, // blink + 0x7f: 0xee03, // alt + 0x80: 0xfa05, // async + 0x82: 0x1702, // li + 0x84: 0x2c02, // mi + 0x85: 0xff06, // itemid + 0x86: 0x11605, // audio + 0x87: 0x31b06, // script + 0x8b: 0x44b06, // srcdoc + 0x8e: 0xc704, // mark + 0x8f: 0x18403, // bdo + 0x91: 0x5120b, // onmousemove + 0x93: 0x3c404, // menu + 0x94: 0x45804, // font + 0x95: 0x14f08, // autoplay + 0x96: 0x6c405, // vlink + 0x98: 0x6e02, // em + 0x9a: 0x5b806, // srcset + 0x9b: 0x1ee08, // colgroup + 0x9c: 0x58e04, // open + 0x9d: 0x1d006, // legend + 0x9e: 0x4e10b, // onloadstart + 0xa2: 0x22109, // translate + 0xa3: 0x6e05, // embed + 0xa4: 0x1c305, // class + 0xa6: 0x6aa08, // template + 0xa7: 0x37206, // header + 0xa9: 0x4b806, // onload + 0xaa: 0x37105, // thead + 0xab: 0x5db09, // scrolling + 0xac: 0xc05, // param + 0xae: 0x9b07, // pattern + 0xaf: 0x9207, // details + 0xb1: 0x4a406, // public + 0xb3: 0x4f50b, // onmousedown + 0xb4: 0x14403, // map + 0xb6: 0x25b0b, // crossorigin + 0xb7: 0x1506, // valign + 0xb9: 0x1bc07, // onabort + 0xba: 0x66e06, // option + 0xbb: 0x26606, // source + 0xbc: 0x6205, // defer + 0xbd: 0x1e509, // challenge + 0xbf: 0x10d05, // value + 0xc0: 0x23d0f, // allowfullscreen + 0xc1: 0xca03, // kbd + 0xc2: 0x2070d, // annotationXml + 0xc3: 0x5be0c, // onratechange + 0xc4: 0x4f702, // mo + 0xc6: 0x3af0a, // spellcheck + 0xc7: 0x2c03, // min + 0xc8: 0x4b80c, // onloadeddata + 0xc9: 0x41205, // clear + 0xca: 0x42710, // ondurationchange + 0xcb: 0x1a04, // nobr + 0xcd: 0x27309, // truespeed + 0xcf: 0x30906, // hgroup + 0xd0: 0x40c05, // start + 0xd3: 0x41908, // dropzone + 0xd5: 0x7405, // label + 0xd8: 0xde0a, // mediagroup + 0xd9: 0x17e06, // onblur + 0xdb: 0x27f07, // caption + 0xdd: 0x7c04, // desc + 0xde: 0x15f05, // xmlns + 0xdf: 0x30106, // height + 0xe0: 0x21407, // command + 0xe2: 0x57f0b, // pauseonexit + 0xe3: 0x68f06, // strong + 0xe4: 0x43e07, // onerror + 0xe5: 0x61c08, // onsubmit + 0xe6: 0xb308, // language + 0xe7: 0x48608, // download + 0xe9: 0x53509, // onmouseup + 0xec: 0x2cf07, // enctype + 0xed: 0x5f508, // onselect + 0xee: 0x2b006, // object + 0xef: 0x1f509, // plaintext + 0xf0: 0x3d30a, // ondblclick + 0xf1: 0x18610, // oncanplaythrough + 0xf2: 0xd903, // dir + 0xf3: 0x38908, // textarea + 0xf4: 0x12a04, // ping + 0xf5: 0x2da06, // method + 0xf6: 0x22a08, // controls + 0xf7: 0x37806, // spacer + 0xf8: 0x6a403, // svg + 0xf9: 0x30504, // html + 0xfa: 0x3d01, // s + 0xfc: 0xcc06, // dialog + 0xfe: 0x1da0d, // typemustmatch + 0xff: 0x3b407, // checked + 0x101: 0x30e06, // poster + 0x102: 0x3260a, // http-equiv + 0x103: 0x44b03, // src + 0x104: 0x10408, // disabled + 0x105: 0x37207, // headers + 0x106: 0x5af0a, // onprogress + 0x107: 0x26c08, // fieldset + 0x108: 0x32f03, // var + 0x10a: 0xa305, // frame + 0x10b: 0x36008, // itemtype + 0x10c: 0x3fc0a, // ondragover + 0x10d: 0x13a09, // autofocus + 0x10f: 0x601, // i + 0x110: 0x35902, // ms + 0x111: 0x45404, // base + 0x113: 0x35a05, // scope + 0x114: 0x3206, // accept + 0x115: 0x58808, // itemprop + 0x117: 0xfe04, // cite + 0x118: 0x3907, // charset + 0x119: 0x14a05, // title + 0x11a: 0x68807, // keytype + 0x11b: 0x1fa04, // text + 0x11c: 0x65807, // optimum + 0x11e: 0x37204, // head + 0x121: 0x21b07, // compact + 0x123: 0x63009, // onsuspend + 0x124: 0x4c404, // list + 0x125: 0x4590c, // ontimeupdate + 0x126: 0x62a06, // center + 0x127: 0x31406, // hidden + 0x129: 0x35609, // itemscope + 0x12c: 0x1a402, // dl + 0x12d: 0x17907, // section + 0x12e: 0x11a08, // oncancel + 0x12f: 0x6b607, // onclick + 0x130: 0xde05, // media + 0x131: 0x52406, // output + 0x132: 0x4c008, // datalist + 0x133: 0x53e0c, // onmousewheel + 0x134: 0x45408, // basefont + 0x135: 0x37e09, // maxlength + 0x136: 0x6bd07, // visible + 0x137: 0x2e00e, // formnovalidate + 0x139: 0x16903, // xmp + 0x13a: 0x101, // b + 0x13b: 0x5710a, // onpageshow + 0x13c: 0xf604, // ruby + 0x13d: 0x16b0b, // placeholder + 0x13e: 0x4c407, // listing + 0x140: 0x26403, // ins + 0x141: 0x62207, // itemref + 0x144: 0x540f, // defaultSelected + 0x146: 0x3f10b, // ondragleave + 0x147: 0x1ae0a, // blockquote + 0x148: 0x59304, // wrap + 0x14a: 0x1a603, // big + 0x14b: 0x35003, // rel + 0x14c: 0x41706, // ondrop + 0x14e: 0x6a706, // system + 0x14f: 0x30a, // radiogroup + 0x150: 0x25605, // table + 0x152: 0x57a03, // wbr + 0x153: 0x3bb0d, // oncontextmenu + 0x155: 0x250d, // undeterminate + 0x157: 0x20204, // cols + 0x158: 0x16307, // sandbox + 0x159: 0x1303, // nav + 0x15a: 0x37e03, // max + 0x15b: 0x7808, // longdesc + 0x15c: 0x60405, // width + 0x15d: 0x34902, // h3 + 0x15e: 0x19e07, // bgsound + 0x161: 0x10d09, // valuetype + 0x162: 0x69505, // style + 0x164: 0x3f05, // tbody + 0x165: 0x40e07, // article + 0x169: 0xcb03, // bdi + 0x16a: 0x67e07, // address + 0x16b: 0x23105, // shape + 0x16c: 0x2ba06, // action + 0x16e: 0x1fd02, // tr + 0x16f: 0xaa02, // td + 0x170: 0x3dd09, // ondragend + 0x171: 0x5802, // ul + 0x172: 0x33806, // border + 0x174: 0x4a06, // keygen + 0x175: 0x4004, // body + 0x177: 0x1c909, // draggable + 0x178: 0x2b60a, // formaction + 0x17b: 0x34406, // mglyph + 0x17d: 0x1d02, // rb + 0x17e: 0x2ff02, // h6 + 0x17f: 0x41e09, // onemptied + 0x180: 0x5ca07, // onreset + 0x181: 0x1004, // main + 0x182: 0x12104, // loop + 0x183: 0x48e0a, // onkeypress + 0x184: 0x9d02, // tt + 0x186: 0x20207, // colspan + 0x188: 0x36f04, // math + 0x189: 0x1605, // align + 0x18a: 0xa108, // noframes + 0x18b: 0xaf02, // hr + 0x18c: 0xc10a, // malignmark + 0x18e: 0x23f03, // low + 0x18f: 0x8502, // id + 0x190: 0x6604, // rows + 0x191: 0x29403, // rev + 0x192: 0x63908, // onunload + 0x193: 0x39e05, // muted + 0x194: 0x35a06, // scoped + 0x195: 0x31602, // dd + 0x196: 0x60602, // dt + 0x197: 0x6720e, // onbeforeunload + 0x199: 0x2070a, // annotation + 0x19a: 0x29408, // reversed + 0x19c: 0x11204, // type + 0x19d: 0x57d07, // onpause + 0x19e: 0xd604, // kind + 0x19f: 0x4c004, // data + 0x1a0: 0x4ff07, // noshade + 0x1a3: 0x17505, // rules + 0x1a4: 0x12308, // optgroup + 0x1a5: 0x202, // br + 0x1a7: 0x1, // a + 0x1a8: 0x51d0a, // onmouseout + 0x1aa: 0x54a09, // onoffline + 0x1ab: 0x6410e, // onvolumechange + 0x1ae: 0x61e03, // sub + 0x1b3: 0x29c03, // for + 0x1b5: 0x8b08, // required + 0x1b6: 0x5b108, // progress + 0x1b7: 0x14106, // usemap + 0x1b8: 0x7f06, // canvas + 0x1b9: 0x4a804, // icon + 0x1bb: 0x1c103, // rtc + 0x1bc: 0x8305, // aside + 0x1bd: 0x2ee04, // time + 0x1be: 0x4060b, // ondragstart + 0x1c0: 0x27c0a, // figcaption + 0x1c1: 0xaf04, // href + 0x1c2: 0x33206, // iframe + 0x1c3: 0x18609, // oncanplay + 0x1c4: 0x6904, // span + 0x1c5: 0x34f03, // pre + 0x1c6: 0x6c07, // noembed + 0x1c8: 0x5e408, // onseeked + 0x1c9: 0x4d304, // meta + 0x1ca: 0x32402, // h2 + 0x1cb: 0x3a808, // seamless + 0x1cc: 0xab03, // dfn + 0x1cd: 0x15704, // axis + 0x1cf: 0x3e60b, // ondragenter + 0x1d0: 0x18f02, // th + 0x1d1: 0x4650c, // onhashchange + 0x1d2: 0xb304, // lang + 0x1d3: 0x44507, // onfocus + 0x1d5: 0x24f04, // size + 0x1d8: 0x12e0c, // autocomplete + 0x1d9: 0xaf08, // hreflang + 0x1da: 0x9804, // samp + 0x1de: 0x19903, // col + 0x1df: 0x10b03, // div + 0x1e0: 0x25308, // sortable + 0x1e1: 0x7203, // del + 0x1e3: 0x3a307, // onclose + 0x1e6: 0xd907, // dirname + 0x1e8: 0x1c307, // classid + 0x1e9: 0x34f07, // preload + 0x1ea: 0x4d908, // tabindex + 0x1eb: 0x60802, // h5 + 0x1ec: 0x5d908, // onscroll + 0x1ed: 0x4a90f, // contenteditable + 0x1ee: 0x4ec09, // onmessage + 0x1ef: 0x4, // abbr + 0x1f0: 0x15907, // isindex + 0x1f1: 0x6a103, // sup + 0x1f3: 0x24b08, // noresize + 0x1f5: 0x59c09, // onplaying + 0x1f6: 0x4409, // accesskey + 0x1fa: 0xc01, // p + 0x1fb: 0x43707, // onended + 0x1fc: 0x5ff06, // onshow + 0x1fe: 0xad06, // nohref +} diff --git a/vendor/github.com/tdewolff/parse/html/hash_test.go b/vendor/github.com/tdewolff/parse/html/hash_test.go new file mode 100644 index 0000000..c905ba3 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/html/hash_test.go @@ -0,0 +1,58 @@ +package html // import "github.com/tdewolff/parse/html" + +import ( + "bytes" + "testing" + + "github.com/tdewolff/test" +) + +func TestHashTable(t *testing.T) { + test.T(t, ToHash([]byte("address")), Address, "'address' must resolve to Address") + test.T(t, Address.String(), "address") + test.T(t, Accept_Charset.String(), "accept-charset") + test.T(t, ToHash([]byte("")), Hash(0), "empty string must resolve to zero") + test.T(t, Hash(0xffffff).String(), "") + test.T(t, ToHash([]byte("iter")), Hash(0), "'iter' must resolve to zero") + test.T(t, ToHash([]byte("test")), Hash(0), "'test' must resolve to zero") +} + +//////////////////////////////////////////////////////////////// + +var result int + +// naive scenario +func BenchmarkCompareBytes(b *testing.B) { + var r int + val := []byte("span") + for n := 0; n < b.N; n++ { + if bytes.Equal(val, []byte("span")) { + r++ + } + } + result = r +} + +// using-atoms scenario +func BenchmarkFindAndCompareAtom(b *testing.B) { + var r int + val := []byte("span") + for n := 0; n < b.N; n++ { + if ToHash(val) == Span { + r++ + } + } + result = r +} + +// using-atoms worst-case scenario +func BenchmarkFindAtomCompareBytes(b *testing.B) { + var r int + val := []byte("zzzz") + for n := 0; n < b.N; n++ { + if h := ToHash(val); h == 0 && bytes.Equal(val, []byte("zzzz")) { + r++ + } + } + result = r +} diff --git a/vendor/github.com/tdewolff/parse/html/lex.go b/vendor/github.com/tdewolff/parse/html/lex.go new file mode 100644 index 0000000..c81490a --- /dev/null +++ b/vendor/github.com/tdewolff/parse/html/lex.go @@ -0,0 +1,485 @@ +// Package html is an HTML5 lexer following the specifications at http://www.w3.org/TR/html5/syntax.html. +package html // import "github.com/tdewolff/parse/html" + +import ( + "io" + "strconv" + + "github.com/tdewolff/parse" + "github.com/tdewolff/parse/buffer" +) + +// TokenType determines the type of token, eg. a number or a semicolon. +type TokenType uint32 + +// TokenType values. +const ( + ErrorToken TokenType = iota // extra token when errors occur + CommentToken + DoctypeToken + StartTagToken + StartTagCloseToken + StartTagVoidToken + EndTagToken + AttributeToken + TextToken + SvgToken + MathToken +) + +// String returns the string representation of a TokenType. +func (tt TokenType) String() string { + switch tt { + case ErrorToken: + return "Error" + case CommentToken: + return "Comment" + case DoctypeToken: + return "Doctype" + case StartTagToken: + return "StartTag" + case StartTagCloseToken: + return "StartTagClose" + case StartTagVoidToken: + return "StartTagVoid" + case EndTagToken: + return "EndTag" + case AttributeToken: + return "Attribute" + case TextToken: + return "Text" + case SvgToken: + return "Svg" + case MathToken: + return "Math" + } + return "Invalid(" + strconv.Itoa(int(tt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// Lexer is the state for the lexer. +type Lexer struct { + r *buffer.Lexer + err error + + rawTag Hash + inTag bool + + text []byte + attrVal []byte +} + +// NewLexer returns a new Lexer for a given io.Reader. +func NewLexer(r io.Reader) *Lexer { + return &Lexer{ + r: buffer.NewLexer(r), + } +} + +// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned. +func (l *Lexer) Err() error { + if err := l.r.Err(); err != nil { + return err + } + return l.err +} + +// Restore restores the NULL byte at the end of the buffer. +func (l *Lexer) Restore() { + l.r.Restore() +} + +// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message. +func (l *Lexer) Next() (TokenType, []byte) { + l.text = nil + var c byte + if l.inTag { + l.attrVal = nil + for { // before attribute name state + if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' { + l.r.Move(1) + continue + } + break + } + if c == 0 { + l.err = parse.NewErrorLexer("unexpected null character", l.r) + return ErrorToken, nil + } else if c != '>' && (c != '/' || l.r.Peek(1) != '>') { + return AttributeToken, l.shiftAttribute() + } + start := l.r.Pos() + l.inTag = false + if c == '/' { + l.r.Move(2) + l.text = l.r.Lexeme()[start:] + return StartTagVoidToken, l.r.Shift() + } + l.r.Move(1) + l.text = l.r.Lexeme()[start:] + return StartTagCloseToken, l.r.Shift() + } + + if l.rawTag != 0 { + if rawText := l.shiftRawText(); len(rawText) > 0 { + l.rawTag = 0 + return TextToken, rawText + } + l.rawTag = 0 + } + + for { + c = l.r.Peek(0) + if c == '<' { + c = l.r.Peek(1) + if l.r.Pos() > 0 { + if c == '/' && l.r.Peek(2) != 0 || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' { + return TextToken, l.r.Shift() + } + } else if c == '/' && l.r.Peek(2) != 0 { + l.r.Move(2) + if c = l.r.Peek(0); c != '>' && !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') { + return CommentToken, l.shiftBogusComment() + } + return EndTagToken, l.shiftEndTag() + } else if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' { + l.r.Move(1) + l.inTag = true + return l.shiftStartTag() + } else if c == '!' { + l.r.Move(2) + return l.readMarkup() + } else if c == '?' { + l.r.Move(1) + return CommentToken, l.shiftBogusComment() + } + } else if c == 0 { + if l.r.Pos() > 0 { + return TextToken, l.r.Shift() + } + l.err = parse.NewErrorLexer("unexpected null character", l.r) + return ErrorToken, nil + } + l.r.Move(1) + } +} + +// Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters. +func (l *Lexer) Text() []byte { + return l.text +} + +// AttrVal returns the attribute value when an AttributeToken was returned from Next. +func (l *Lexer) AttrVal() []byte { + return l.attrVal +} + +//////////////////////////////////////////////////////////////// + +// The following functions follow the specifications at http://www.w3.org/html/wg/drafts/html/master/syntax.html + +func (l *Lexer) shiftRawText() []byte { + if l.rawTag == Plaintext { + for { + if l.r.Peek(0) == 0 { + return l.r.Shift() + } + l.r.Move(1) + } + } else { // RCDATA, RAWTEXT and SCRIPT + for { + c := l.r.Peek(0) + if c == '<' { + if l.r.Peek(1) == '/' { + mark := l.r.Pos() + l.r.Move(2) + for { + if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') { + break + } + l.r.Move(1) + } + if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == l.rawTag { // copy so that ToLower doesn't change the case of the underlying slice + l.r.Rewind(mark) + return l.r.Shift() + } + } else if l.rawTag == Script && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' { + l.r.Move(4) + inScript := false + for { + c := l.r.Peek(0) + if c == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' { + l.r.Move(3) + break + } else if c == '<' { + isEnd := l.r.Peek(1) == '/' + if isEnd { + l.r.Move(2) + } else { + l.r.Move(1) + } + mark := l.r.Pos() + for { + if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') { + break + } + l.r.Move(1) + } + if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark:]))); h == Script { // copy so that ToLower doesn't change the case of the underlying slice + if !isEnd { + inScript = true + } else { + if !inScript { + l.r.Rewind(mark - 2) + return l.r.Shift() + } + inScript = false + } + } + } else if c == 0 { + return l.r.Shift() + } + l.r.Move(1) + } + } else { + l.r.Move(1) + } + } else if c == 0 { + return l.r.Shift() + } else { + l.r.Move(1) + } + } + } +} + +func (l *Lexer) readMarkup() (TokenType, []byte) { + if l.at('-', '-') { + l.r.Move(2) + for { + if l.r.Peek(0) == 0 { + return CommentToken, l.r.Shift() + } else if l.at('-', '-', '>') { + l.text = l.r.Lexeme()[4:] + l.r.Move(3) + return CommentToken, l.r.Shift() + } else if l.at('-', '-', '!', '>') { + l.text = l.r.Lexeme()[4:] + l.r.Move(4) + return CommentToken, l.r.Shift() + } + l.r.Move(1) + } + } else if l.at('[', 'C', 'D', 'A', 'T', 'A', '[') { + l.r.Move(7) + for { + if l.r.Peek(0) == 0 { + return TextToken, l.r.Shift() + } else if l.at(']', ']', '>') { + l.r.Move(3) + return TextToken, l.r.Shift() + } + l.r.Move(1) + } + } else { + if l.atCaseInsensitive('d', 'o', 'c', 't', 'y', 'p', 'e') { + l.r.Move(7) + if l.r.Peek(0) == ' ' { + l.r.Move(1) + } + for { + if c := l.r.Peek(0); c == '>' || c == 0 { + l.text = l.r.Lexeme()[9:] + if c == '>' { + l.r.Move(1) + } + return DoctypeToken, l.r.Shift() + } + l.r.Move(1) + } + } + } + return CommentToken, l.shiftBogusComment() +} + +func (l *Lexer) shiftBogusComment() []byte { + for { + c := l.r.Peek(0) + if c == '>' { + l.text = l.r.Lexeme()[2:] + l.r.Move(1) + return l.r.Shift() + } else if c == 0 { + l.text = l.r.Lexeme()[2:] + return l.r.Shift() + } + l.r.Move(1) + } +} + +func (l *Lexer) shiftStartTag() (TokenType, []byte) { + for { + if c := l.r.Peek(0); c == ' ' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 { + break + } + l.r.Move(1) + } + l.text = parse.ToLower(l.r.Lexeme()[1:]) + if h := ToHash(l.text); h == Textarea || h == Title || h == Style || h == Xmp || h == Iframe || h == Script || h == Plaintext || h == Svg || h == Math { + if h == Svg { + l.inTag = false + return SvgToken, l.shiftXml(h) + } else if h == Math { + l.inTag = false + return MathToken, l.shiftXml(h) + } + l.rawTag = h + } + return StartTagToken, l.r.Shift() +} + +func (l *Lexer) shiftAttribute() []byte { + nameStart := l.r.Pos() + var c byte + for { // attribute name state + if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 { + break + } + l.r.Move(1) + } + nameEnd := l.r.Pos() + for { // after attribute name state + if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' { + l.r.Move(1) + continue + } + break + } + if c == '=' { + l.r.Move(1) + for { // before attribute value state + if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' { + l.r.Move(1) + continue + } + break + } + attrPos := l.r.Pos() + delim := c + if delim == '"' || delim == '\'' { // attribute value single- and double-quoted state + l.r.Move(1) + for { + c := l.r.Peek(0) + if c == delim { + l.r.Move(1) + break + } else if c == 0 { + break + } + l.r.Move(1) + } + } else { // attribute value unquoted state + for { + if c := l.r.Peek(0); c == ' ' || c == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 { + break + } + l.r.Move(1) + } + } + l.attrVal = l.r.Lexeme()[attrPos:] + } else { + l.r.Rewind(nameEnd) + l.attrVal = nil + } + l.text = parse.ToLower(l.r.Lexeme()[nameStart:nameEnd]) + return l.r.Shift() +} + +func (l *Lexer) shiftEndTag() []byte { + for { + c := l.r.Peek(0) + if c == '>' { + l.text = l.r.Lexeme()[2:] + l.r.Move(1) + break + } else if c == 0 { + l.text = l.r.Lexeme()[2:] + break + } + l.r.Move(1) + } + + end := len(l.text) + for end > 0 { + if c := l.text[end-1]; c == ' ' || c == '\t' || c == '\n' || c == '\r' { + end-- + continue + } + break + } + l.text = l.text[:end] + return parse.ToLower(l.r.Shift()) +} + +func (l *Lexer) shiftXml(rawTag Hash) []byte { + inQuote := false + for { + c := l.r.Peek(0) + if c == '"' { + inQuote = !inQuote + l.r.Move(1) + } else if c == '<' && !inQuote { + if l.r.Peek(1) == '/' { + mark := l.r.Pos() + l.r.Move(2) + for { + if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') { + break + } + l.r.Move(1) + } + if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == rawTag { // copy so that ToLower doesn't change the case of the underlying slice + break + } + } else { + l.r.Move(1) + } + } else if c == 0 { + return l.r.Shift() + } + l.r.Move(1) + } + + for { + c := l.r.Peek(0) + if c == '>' { + l.r.Move(1) + break + } else if c == 0 { + break + } + l.r.Move(1) + } + return l.r.Shift() +} + +//////////////////////////////////////////////////////////////// + +func (l *Lexer) at(b ...byte) bool { + for i, c := range b { + if l.r.Peek(i) != c { + return false + } + } + return true +} + +func (l *Lexer) atCaseInsensitive(b ...byte) bool { + for i, c := range b { + if l.r.Peek(i) != c && (l.r.Peek(i)+('a'-'A')) != c { + return false + } + } + return true +} diff --git a/vendor/github.com/tdewolff/parse/html/lex_test.go b/vendor/github.com/tdewolff/parse/html/lex_test.go new file mode 100644 index 0000000..5f4ca0b --- /dev/null +++ b/vendor/github.com/tdewolff/parse/html/lex_test.go @@ -0,0 +1,262 @@ +package html // import "github.com/tdewolff/parse/html" + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/tdewolff/parse" + "github.com/tdewolff/test" +) + +type TTs []TokenType + +func TestTokens(t *testing.T) { + var tokenTests = []struct { + html string + expected []TokenType + }{ + {"<html></html>", TTs{StartTagToken, StartTagCloseToken, EndTagToken}}, + {"<img/>", TTs{StartTagToken, StartTagVoidToken}}, + {"<!-- comment -->", TTs{CommentToken}}, + {"<!-- comment --!>", TTs{CommentToken}}, + {"<p>text</p>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}}, + {"<input type='button'/>", TTs{StartTagToken, AttributeToken, StartTagVoidToken}}, + {"<input type='button' value=''/>", TTs{StartTagToken, AttributeToken, AttributeToken, StartTagVoidToken}}, + {"<input type='=/>' \r\n\t\f value=\"'\" name=x checked />", TTs{StartTagToken, AttributeToken, AttributeToken, AttributeToken, AttributeToken, StartTagVoidToken}}, + {"<!doctype>", TTs{DoctypeToken}}, + {"<!doctype html>", TTs{DoctypeToken}}, + {"<?bogus>", TTs{CommentToken}}, + {"</0bogus>", TTs{CommentToken}}, + {"<!bogus>", TTs{CommentToken}}, + {"< ", TTs{TextToken}}, + {"</", TTs{TextToken}}, + + // raw tags + {"<title><p></p></title>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}}, + {"<TITLE><p></p></TITLE>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}}, + {"<plaintext></plaintext>", TTs{StartTagToken, StartTagCloseToken, TextToken}}, + {"<script></script>", TTs{StartTagToken, StartTagCloseToken, EndTagToken}}, + {"<script>var x='</script>';</script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken}}, + {"<script><!--var x='</script>';--></script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken}}, + {"<script><!--var x='<script></script>';--></script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}}, + {"<script><!--var x='<script>';--></script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}}, + {"<![CDATA[ test ]]>", TTs{TextToken}}, + {"<svg>text</svg>", TTs{SvgToken}}, + {"<math>text</math>", TTs{MathToken}}, + {`<svg>text<x a="</svg>"></x></svg>`, TTs{SvgToken}}, + {"<a><svg>text</svg></a>", TTs{StartTagToken, StartTagCloseToken, SvgToken, EndTagToken}}, + + // early endings + {"<!-- comment", TTs{CommentToken}}, + {"<? bogus comment", TTs{CommentToken}}, + {"<foo", TTs{StartTagToken}}, + {"</foo", TTs{EndTagToken}}, + {"<foo x", TTs{StartTagToken, AttributeToken}}, + {"<foo x=", TTs{StartTagToken, AttributeToken}}, + {"<foo x='", TTs{StartTagToken, AttributeToken}}, + {"<foo x=''", TTs{StartTagToken, AttributeToken}}, + {"<!DOCTYPE note SYSTEM", TTs{DoctypeToken}}, + {"<![CDATA[ test", TTs{TextToken}}, + {"<script>", TTs{StartTagToken, StartTagCloseToken}}, + {"<script><!--", TTs{StartTagToken, StartTagCloseToken, TextToken}}, + {"<script><!--var x='<script></script>';-->", TTs{StartTagToken, StartTagCloseToken, TextToken}}, + + // go-fuzz + {"</>", TTs{EndTagToken}}, + } + for _, tt := range tokenTests { + t.Run(tt.html, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.html)) + i := 0 + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } + test.That(t, i < len(tt.expected), "index", i, "must not exceed expected token types size", len(tt.expected)) + if i < len(tt.expected) { + test.T(t, token, tt.expected[i], "token types must match") + } + i++ + } + }) + } + + test.T(t, TokenType(100).String(), "Invalid(100)") +} + +func TestTags(t *testing.T) { + var tagTests = []struct { + html string + expected string + }{ + {"<foo:bar.qux-norf/>", "foo:bar.qux-norf"}, + {"<foo?bar/qux>", "foo?bar/qux"}, + {"<!DOCTYPE note SYSTEM \"Note.dtd\">", " note SYSTEM \"Note.dtd\""}, + {"</foo >", "foo"}, + + // early endings + {"<foo ", "foo"}, + } + for _, tt := range tagTests { + t.Run(tt.html, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.html)) + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.Fail(t, "when error occurred we must be at the end") + break + } else if token == StartTagToken || token == EndTagToken || token == DoctypeToken { + test.String(t, string(l.Text()), tt.expected) + break + } + } + }) + } +} + +func TestAttributes(t *testing.T) { + var attributeTests = []struct { + attr string + expected []string + }{ + {"<foo a=\"b\" />", []string{"a", "\"b\""}}, + {"<foo \nchecked \r\n value\r=\t'=/>\"' />", []string{"checked", "", "value", "'=/>\"'"}}, + {"<foo bar=\" a \n\t\r b \" />", []string{"bar", "\" a \n\t\r b \""}}, + {"<foo a/>", []string{"a", ""}}, + {"<foo /=/>", []string{"/", "/"}}, + + // early endings + {"<foo x", []string{"x", ""}}, + {"<foo x=", []string{"x", ""}}, + {"<foo x='", []string{"x", "'"}}, + } + for _, tt := range attributeTests { + t.Run(tt.attr, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.attr)) + i := 0 + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } else if token == AttributeToken { + test.That(t, i+1 < len(tt.expected), "index", i+1, "must not exceed expected attributes size", len(tt.expected)) + if i+1 < len(tt.expected) { + test.String(t, string(l.Text()), tt.expected[i], "attribute keys must match") + test.String(t, string(l.AttrVal()), tt.expected[i+1], "attribute keys must match") + i += 2 + } + } + } + }) + } +} + +func TestErrors(t *testing.T) { + var errorTests = []struct { + html string + col int + }{ + {"a\x00b", 2}, + } + for _, tt := range errorTests { + t.Run(tt.html, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.html)) + for { + token, _ := l.Next() + if token == ErrorToken { + if tt.col == 0 { + test.T(t, l.Err(), io.EOF) + } else if perr, ok := l.Err().(*parse.Error); ok { + test.T(t, perr.Col, tt.col) + } else { + test.Fail(t, "bad error:", l.Err()) + } + break + } + } + }) + } +} + +//////////////////////////////////////////////////////////////// + +var J int +var ss = [][]byte{ + []byte(" style"), + []byte("style"), + []byte(" \r\n\tstyle"), + []byte(" style"), + []byte(" x"), + []byte("x"), +} + +func BenchmarkWhitespace1(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, s := range ss { + j := 0 + for { + if c := s[j]; c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' { + j++ + } else { + break + } + } + J += j + } + } +} + +func BenchmarkWhitespace2(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, s := range ss { + j := 0 + for { + if c := s[j]; c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' { + j++ + continue + } + break + } + J += j + } + } +} + +func BenchmarkWhitespace3(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, s := range ss { + j := 0 + for { + if c := s[j]; c != ' ' && c != '\t' && c != '\n' && c != '\r' && c != '\f' { + break + } + j++ + } + J += j + } + } +} + +//////////////////////////////////////////////////////////////// + +func ExampleNewLexer() { + l := NewLexer(bytes.NewBufferString("<span class='user'>John Doe</span>")) + out := "" + for { + tt, data := l.Next() + if tt == ErrorToken { + break + } + out += string(data) + } + fmt.Println(out) + // Output: <span class='user'>John Doe</span> +} diff --git a/vendor/github.com/tdewolff/parse/html/util.go b/vendor/github.com/tdewolff/parse/html/util.go new file mode 100644 index 0000000..c8c3aab --- /dev/null +++ b/vendor/github.com/tdewolff/parse/html/util.go @@ -0,0 +1,129 @@ +package html // import "github.com/tdewolff/parse/html" + +import "github.com/tdewolff/parse" + +var ( + singleQuoteEntityBytes = []byte("'") + doubleQuoteEntityBytes = []byte(""") +) + +var charTable = [256]bool{ + // ASCII + false, false, false, false, false, false, false, false, + false, true, true, true, true, true, false, false, // tab, new line, vertical tab, form feed, carriage return + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + true, false, true, false, false, false, true, true, // space, ", &, ' + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, true, true, true, false, // <, =, > + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + true, false, false, false, false, false, false, false, // ` + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + // non-ASCII + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, +} + +// EscapeAttrVal returns the escaped attribute value bytes without quotes. +func EscapeAttrVal(buf *[]byte, orig, b []byte) []byte { + singles := 0 + doubles := 0 + unquoted := true + entities := false + for i, c := range b { + if charTable[c] { + if c == '&' { + entities = true + if quote, n := parse.QuoteEntity(b[i:]); n > 0 { + if quote == '"' { + unquoted = false + doubles++ + } else { + unquoted = false + singles++ + } + } + } else { + unquoted = false + if c == '"' { + doubles++ + } else if c == '\'' { + singles++ + } + } + } + } + if unquoted { + return b + } else if !entities && len(orig) == len(b)+2 && (singles == 0 && orig[0] == '\'' || doubles == 0 && orig[0] == '"') { + return orig + } + + n := len(b) + 2 + var quote byte + var escapedQuote []byte + if doubles > singles { + n += singles * 4 + quote = '\'' + escapedQuote = singleQuoteEntityBytes + } else { + n += doubles * 4 + quote = '"' + escapedQuote = doubleQuoteEntityBytes + } + if n > cap(*buf) { + *buf = make([]byte, 0, n) // maximum size, not actual size + } + t := (*buf)[:n] // maximum size, not actual size + t[0] = quote + j := 1 + start := 0 + for i, c := range b { + if c == '&' { + if entityQuote, n := parse.QuoteEntity(b[i:]); n > 0 { + j += copy(t[j:], b[start:i]) + if entityQuote != quote { + t[j] = entityQuote + j++ + } else { + j += copy(t[j:], escapedQuote) + } + start = i + n + } + } else if c == quote { + j += copy(t[j:], b[start:i]) + j += copy(t[j:], escapedQuote) + start = i + 1 + } + } + j += copy(t[j:], b[start:]) + t[j] = quote + return t[:j+1] +} diff --git a/vendor/github.com/tdewolff/parse/html/util_test.go b/vendor/github.com/tdewolff/parse/html/util_test.go new file mode 100644 index 0000000..3722a08 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/html/util_test.go @@ -0,0 +1,43 @@ +package html // import "github.com/tdewolff/parse/html" + +import ( + "testing" + + "github.com/tdewolff/test" +) + +func TestEscapeAttrVal(t *testing.T) { + var escapeAttrValTests = []struct { + attrVal string + expected string + }{ + {"xyz", "xyz"}, + {"", ""}, + {"x&z", "x&z"}, + {"x/z", "x/z"}, + {"x'z", "\"x'z\""}, + {"x\"z", "'x\"z'"}, + {"'x\"z'", "'x\"z'"}, + {"'x'\"'z'", "\"x'"'z\""}, + {"\"x"'"z\"", "'x\"'\"z'"}, + {"\"x'z\"", "\"x'z\""}, + {"'x"z'", "'x\"z'"}, + {"'x\">'", "'x\">'"}, + {"You're encouraged to log in; however, it's not mandatory. [o]", "\"You're encouraged to log in; however, it's not mandatory. [o]\""}, + {"a'b=\"\"", "'a'b=\"\"'"}, + {"x<z", "\"x<z\""}, + {"'x\"'\"z'", "'x\"'\"z'"}, + } + var buf []byte + for _, tt := range escapeAttrValTests { + t.Run(tt.attrVal, func(t *testing.T) { + b := []byte(tt.attrVal) + orig := b + if len(b) > 1 && (b[0] == '"' || b[0] == '\'') && b[0] == b[len(b)-1] { + b = b[1 : len(b)-1] + } + val := EscapeAttrVal(&buf, orig, []byte(b)) + test.String(t, string(val), tt.expected) + }) + } +} diff --git a/vendor/github.com/tdewolff/parse/js/README.md b/vendor/github.com/tdewolff/parse/js/README.md new file mode 100644 index 0000000..7b06d7c --- /dev/null +++ b/vendor/github.com/tdewolff/parse/js/README.md @@ -0,0 +1,89 @@ +# JS [![GoDoc](http://godoc.org/github.com/tdewolff/parse/js?status.svg)](http://godoc.org/github.com/tdewolff/parse/js) [![GoCover](http://gocover.io/_badge/github.com/tdewolff/parse/js)](http://gocover.io/github.com/tdewolff/parse/js) + +This package is a JS lexer (ECMA-262, edition 6.0) written in [Go][1]. It follows the specification at [ECMAScript Language Specification](http://www.ecma-international.org/ecma-262/6.0/). The lexer takes an io.Reader and converts it into tokens until the EOF. + +## Installation +Run the following command + + go get github.com/tdewolff/parse/js + +or add the following import and run project with `go get` + + import "github.com/tdewolff/parse/js" + +## Lexer +### Usage +The following initializes a new Lexer with io.Reader `r`: +``` go +l := js.NewLexer(r) +``` + +To tokenize until EOF an error, use: +``` go +for { + tt, text := l.Next() + switch tt { + case js.ErrorToken: + // error or EOF set in l.Err() + return + // ... + } +} +``` + +All tokens (see [ECMAScript Language Specification](http://www.ecma-international.org/ecma-262/6.0/)): +``` go +ErrorToken TokenType = iota // extra token when errors occur +UnknownToken // extra token when no token can be matched +WhitespaceToken // space \t \v \f +LineTerminatorToken // \r \n \r\n +CommentToken +IdentifierToken // also: null true false +PunctuatorToken /* { } ( ) [ ] . ; , < > <= >= == != === !== + - * % ++ -- << >> + >>> & | ^ ! ~ && || ? : = += -= *= %= <<= >>= >>>= &= |= ^= / /= => */ +NumericToken +StringToken +RegexpToken +TemplateToken +``` + +### Quirks +Because the ECMAScript specification for `PunctuatorToken` (of which the `/` and `/=` symbols) and `RegexpToken` depends on a parser state to differentiate between the two, the lexer (to remain modular) uses different rules. It aims to correctly disambiguate contexts and returns `RegexpToken` or `PunctuatorToken` where appropriate with only few exceptions which don't make much sense in runtime and so don't happen in a real-world code: function literal division (`x = function y(){} / z`) and object literal division (`x = {y:1} / z`). + +Another interesting case introduced by ES2015 is `yield` operator in function generators vs `yield` as an identifier in regular functions. This was done for backward compatibility, but is very hard to disambiguate correctly on a lexer level without essentially implementing entire parsing spec as a state machine and hurting performance, code readability and maintainability, so, instead, `yield` is just always assumed to be an operator. In combination with above paragraph, this means that, for example, `yield /x/i` will be always parsed as `yield`-ing regular expression and not as `yield` identifier divided by `x` and then `i`. There is no evidence though that this pattern occurs in any popular libraries. + +### Examples +``` go +package main + +import ( + "os" + + "github.com/tdewolff/parse/js" +) + +// Tokenize JS from stdin. +func main() { + l := js.NewLexer(os.Stdin) + for { + tt, text := l.Next() + switch tt { + case js.ErrorToken: + if l.Err() != io.EOF { + fmt.Println("Error on line", l.Line(), ":", l.Err()) + } + return + case js.IdentifierToken: + fmt.Println("Identifier", string(text)) + case js.NumericToken: + fmt.Println("Numeric", string(text)) + // ... + } + } +} +``` + +## License +Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). + +[1]: http://golang.org/ "Go Language" diff --git a/vendor/github.com/tdewolff/parse/js/hash.go b/vendor/github.com/tdewolff/parse/js/hash.go new file mode 100644 index 0000000..3de86b2 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/js/hash.go @@ -0,0 +1,156 @@ +package js + +// generated by hasher -file hash.go -type Hash; DO NOT EDIT, except for adding more constants to the list and rerun go generate + +// uses github.com/tdewolff/hasher +//go:generate hasher -type=Hash -file=hash.go + +// Hash defines perfect hashes for a predefined list of strings +type Hash uint32 + +// Unique hash definitions to be used instead of strings +const ( + Break Hash = 0x5 // break + Case Hash = 0x3404 // case + Catch Hash = 0xba05 // catch + Class Hash = 0x505 // class + Const Hash = 0x2c05 // const + Continue Hash = 0x3e08 // continue + Debugger Hash = 0x8408 // debugger + Default Hash = 0xab07 // default + Delete Hash = 0xcd06 // delete + Do Hash = 0x4c02 // do + Else Hash = 0x3704 // else + Enum Hash = 0x3a04 // enum + Export Hash = 0x1806 // export + Extends Hash = 0x4507 // extends + False Hash = 0x5a05 // false + Finally Hash = 0x7a07 // finally + For Hash = 0xc403 // for + Function Hash = 0x4e08 // function + If Hash = 0x5902 // if + Implements Hash = 0x5f0a // implements + Import Hash = 0x6906 // import + In Hash = 0x4202 // in + Instanceof Hash = 0x710a // instanceof + Interface Hash = 0x8c09 // interface + Let Hash = 0xcf03 // let + New Hash = 0x1203 // new + Null Hash = 0x5504 // null + Package Hash = 0x9507 // package + Private Hash = 0x9c07 // private + Protected Hash = 0xa309 // protected + Public Hash = 0xb506 // public + Return Hash = 0xd06 // return + Static Hash = 0x2f06 // static + Super Hash = 0x905 // super + Switch Hash = 0x2606 // switch + This Hash = 0x2304 // this + Throw Hash = 0x1d05 // throw + True Hash = 0xb104 // true + Try Hash = 0x6e03 // try + Typeof Hash = 0xbf06 // typeof + Var Hash = 0xc703 // var + Void Hash = 0xca04 // void + While Hash = 0x1405 // while + With Hash = 0x2104 // with + Yield Hash = 0x8005 // yield +) + +// String returns the hash' name. +func (i Hash) String() string { + start := uint32(i >> 8) + n := uint32(i & 0xff) + if start+n > uint32(len(_Hash_text)) { + return "" + } + return _Hash_text[start : start+n] +} + +// ToHash returns the hash whose name is s. It returns zero if there is no +// such hash. It is case sensitive. +func ToHash(s []byte) Hash { + if len(s) == 0 || len(s) > _Hash_maxLen { + return 0 + } + h := uint32(_Hash_hash0) + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= 16777619 + } + if i := _Hash_table[h&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { + t := _Hash_text[i>>8 : i>>8+i&0xff] + for i := 0; i < len(s); i++ { + if t[i] != s[i] { + goto NEXT + } + } + return i + } +NEXT: + if i := _Hash_table[(h>>16)&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { + t := _Hash_text[i>>8 : i>>8+i&0xff] + for i := 0; i < len(s); i++ { + if t[i] != s[i] { + return 0 + } + } + return i + } + return 0 +} + +const _Hash_hash0 = 0x9acb0442 +const _Hash_maxLen = 10 +const _Hash_text = "breakclassupereturnewhilexporthrowithiswitchconstaticaselsen" + + "umcontinuextendsdofunctionullifalseimplementsimportryinstanc" + + "eofinallyieldebuggerinterfacepackageprivateprotectedefaultru" + + "epublicatchtypeoforvarvoidelete" + +var _Hash_table = [1 << 6]Hash{ + 0x0: 0x2f06, // static + 0x1: 0x9c07, // private + 0x3: 0xb104, // true + 0x6: 0x5a05, // false + 0x7: 0x4c02, // do + 0x9: 0x2c05, // const + 0xa: 0x2606, // switch + 0xb: 0x6e03, // try + 0xc: 0x1203, // new + 0xd: 0x4202, // in + 0xf: 0x8005, // yield + 0x10: 0x5f0a, // implements + 0x11: 0xc403, // for + 0x12: 0x505, // class + 0x13: 0x3a04, // enum + 0x16: 0xc703, // var + 0x17: 0x5902, // if + 0x19: 0xcf03, // let + 0x1a: 0x9507, // package + 0x1b: 0xca04, // void + 0x1c: 0xcd06, // delete + 0x1f: 0x5504, // null + 0x20: 0x1806, // export + 0x21: 0xd06, // return + 0x23: 0x4507, // extends + 0x25: 0x2304, // this + 0x26: 0x905, // super + 0x27: 0x1405, // while + 0x29: 0x5, // break + 0x2b: 0x3e08, // continue + 0x2e: 0x3404, // case + 0x2f: 0xab07, // default + 0x31: 0x8408, // debugger + 0x32: 0x1d05, // throw + 0x33: 0xbf06, // typeof + 0x34: 0x2104, // with + 0x35: 0xba05, // catch + 0x36: 0x4e08, // function + 0x37: 0x710a, // instanceof + 0x38: 0xa309, // protected + 0x39: 0x8c09, // interface + 0x3b: 0xb506, // public + 0x3c: 0x3704, // else + 0x3d: 0x7a07, // finally + 0x3f: 0x6906, // import +} diff --git a/vendor/github.com/tdewolff/parse/js/hash_test.go b/vendor/github.com/tdewolff/parse/js/hash_test.go new file mode 100644 index 0000000..fa6a213 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/js/hash_test.go @@ -0,0 +1,18 @@ +package js // import "github.com/tdewolff/parse/js" + +import ( + "testing" + + "github.com/tdewolff/test" +) + +func TestHashTable(t *testing.T) { + test.T(t, ToHash([]byte("break")), Break, "'break' must resolve to hash.Break") + test.T(t, ToHash([]byte("var")), Var, "'var' must resolve to hash.Var") + test.T(t, Break.String(), "break") + test.T(t, ToHash([]byte("")), Hash(0), "empty string must resolve to zero") + test.T(t, Hash(0xffffff).String(), "") + test.T(t, ToHash([]byte("breaks")), Hash(0), "'breaks' must resolve to zero") + test.T(t, ToHash([]byte("sdf")), Hash(0), "'sdf' must resolve to zero") + test.T(t, ToHash([]byte("uio")), Hash(0), "'uio' must resolve to zero") +} diff --git a/vendor/github.com/tdewolff/parse/js/lex.go b/vendor/github.com/tdewolff/parse/js/lex.go new file mode 100644 index 0000000..96ffcd8 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/js/lex.go @@ -0,0 +1,650 @@ +// Package js is an ECMAScript5.1 lexer following the specifications at http://www.ecma-international.org/ecma-262/5.1/. +package js // import "github.com/tdewolff/parse/js" + +import ( + "io" + "strconv" + "unicode" + + "github.com/tdewolff/parse/buffer" +) + +var identifierStart = []*unicode.RangeTable{unicode.Lu, unicode.Ll, unicode.Lt, unicode.Lm, unicode.Lo, unicode.Nl, unicode.Other_ID_Start} +var identifierContinue = []*unicode.RangeTable{unicode.Lu, unicode.Ll, unicode.Lt, unicode.Lm, unicode.Lo, unicode.Nl, unicode.Mn, unicode.Mc, unicode.Nd, unicode.Pc, unicode.Other_ID_Continue} + +//////////////////////////////////////////////////////////////// + +// TokenType determines the type of token, eg. a number or a semicolon. +type TokenType uint32 + +// TokenType values. +const ( + ErrorToken TokenType = iota // extra token when errors occur + UnknownToken // extra token when no token can be matched + WhitespaceToken // space \t \v \f + LineTerminatorToken // \r \n \r\n + CommentToken + IdentifierToken + PunctuatorToken /* { } ( ) [ ] . ; , < > <= >= == != === !== + - * % ++ -- << >> + >>> & | ^ ! ~ && || ? : = += -= *= %= <<= >>= >>>= &= |= ^= / /= >= */ + NumericToken + StringToken + RegexpToken + TemplateToken +) + +// TokenState determines a state in which next token should be read +type TokenState uint32 + +// TokenState values +const ( + ExprState TokenState = iota + StmtParensState + SubscriptState + PropNameState +) + +// ParsingContext determines the context in which following token should be parsed. +// This affects parsing regular expressions and template literals. +type ParsingContext uint32 + +// ParsingContext values +const ( + GlobalContext ParsingContext = iota + StmtParensContext + ExprParensContext + BracesContext + TemplateContext +) + +// String returns the string representation of a TokenType. +func (tt TokenType) String() string { + switch tt { + case ErrorToken: + return "Error" + case UnknownToken: + return "Unknown" + case WhitespaceToken: + return "Whitespace" + case LineTerminatorToken: + return "LineTerminator" + case CommentToken: + return "Comment" + case IdentifierToken: + return "Identifier" + case PunctuatorToken: + return "Punctuator" + case NumericToken: + return "Numeric" + case StringToken: + return "String" + case RegexpToken: + return "Regexp" + case TemplateToken: + return "Template" + } + return "Invalid(" + strconv.Itoa(int(tt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// Lexer is the state for the lexer. +type Lexer struct { + r *buffer.Lexer + stack []ParsingContext + state TokenState + emptyLine bool +} + +// NewLexer returns a new Lexer for a given io.Reader. +func NewLexer(r io.Reader) *Lexer { + return &Lexer{ + r: buffer.NewLexer(r), + stack: make([]ParsingContext, 0, 16), + state: ExprState, + emptyLine: true, + } +} + +func (l *Lexer) enterContext(context ParsingContext) { + l.stack = append(l.stack, context) +} + +func (l *Lexer) leaveContext() ParsingContext { + ctx := GlobalContext + if last := len(l.stack) - 1; last >= 0 { + ctx, l.stack = l.stack[last], l.stack[:last] + } + return ctx +} + +// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned. +func (l *Lexer) Err() error { + return l.r.Err() +} + +// Restore restores the NULL byte at the end of the buffer. +func (l *Lexer) Restore() { + l.r.Restore() +} + +// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message. +func (l *Lexer) Next() (TokenType, []byte) { + tt := UnknownToken + c := l.r.Peek(0) + switch c { + case '(': + if l.state == StmtParensState { + l.enterContext(StmtParensContext) + } else { + l.enterContext(ExprParensContext) + } + l.state = ExprState + l.r.Move(1) + tt = PunctuatorToken + case ')': + if l.leaveContext() == StmtParensContext { + l.state = ExprState + } else { + l.state = SubscriptState + } + l.r.Move(1) + tt = PunctuatorToken + case '{': + l.enterContext(BracesContext) + l.state = ExprState + l.r.Move(1) + tt = PunctuatorToken + case '}': + if l.leaveContext() == TemplateContext && l.consumeTemplateToken() { + tt = TemplateToken + } else { + // will work incorrectly for objects or functions divided by something, + // but that's an extremely rare case + l.state = ExprState + l.r.Move(1) + tt = PunctuatorToken + } + case ']': + l.state = SubscriptState + l.r.Move(1) + tt = PunctuatorToken + case '[', ';', ',', '~', '?', ':': + l.state = ExprState + l.r.Move(1) + tt = PunctuatorToken + case '<', '>', '=', '!', '+', '-', '*', '%', '&', '|', '^': + if (c == '<' || (l.emptyLine && c == '-')) && l.consumeCommentToken() { + return CommentToken, l.r.Shift() + } else if l.consumeLongPunctuatorToken() { + l.state = ExprState + tt = PunctuatorToken + } + case '/': + if l.consumeCommentToken() { + return CommentToken, l.r.Shift() + } else if l.state == ExprState && l.consumeRegexpToken() { + l.state = SubscriptState + tt = RegexpToken + } else if l.consumeLongPunctuatorToken() { + l.state = ExprState + tt = PunctuatorToken + } + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.': + if l.consumeNumericToken() { + tt = NumericToken + l.state = SubscriptState + } else if c == '.' { + l.state = PropNameState + l.r.Move(1) + tt = PunctuatorToken + } + case '\'', '"': + if l.consumeStringToken() { + l.state = SubscriptState + tt = StringToken + } + case ' ', '\t', '\v', '\f': + l.r.Move(1) + for l.consumeWhitespace() { + } + return WhitespaceToken, l.r.Shift() + case '\n', '\r': + l.r.Move(1) + for l.consumeLineTerminator() { + } + tt = LineTerminatorToken + case '`': + if l.consumeTemplateToken() { + tt = TemplateToken + } + default: + if l.consumeIdentifierToken() { + tt = IdentifierToken + if l.state != PropNameState { + switch hash := ToHash(l.r.Lexeme()); hash { + case 0, This, False, True, Null: + l.state = SubscriptState + case If, While, For, With: + l.state = StmtParensState + default: + // This will include keywords that can't be followed by a regexp, but only + // by a specified char (like `switch` or `try`), but we don't check for syntax + // errors as we don't attempt to parse a full JS grammar when streaming + l.state = ExprState + } + } else { + l.state = SubscriptState + } + } else if c >= 0xC0 { + if l.consumeWhitespace() { + for l.consumeWhitespace() { + } + return WhitespaceToken, l.r.Shift() + } else if l.consumeLineTerminator() { + for l.consumeLineTerminator() { + } + tt = LineTerminatorToken + } + } else if l.Err() != nil { + return ErrorToken, nil + } + } + + l.emptyLine = tt == LineTerminatorToken + + if tt == UnknownToken { + _, n := l.r.PeekRune(0) + l.r.Move(n) + } + return tt, l.r.Shift() +} + +//////////////////////////////////////////////////////////////// + +/* +The following functions follow the specifications at http://www.ecma-international.org/ecma-262/5.1/ +*/ + +func (l *Lexer) consumeWhitespace() bool { + c := l.r.Peek(0) + if c == ' ' || c == '\t' || c == '\v' || c == '\f' { + l.r.Move(1) + return true + } else if c >= 0xC0 { + if r, n := l.r.PeekRune(0); r == '\u00A0' || r == '\uFEFF' || unicode.Is(unicode.Zs, r) { + l.r.Move(n) + return true + } + } + return false +} + +func (l *Lexer) consumeLineTerminator() bool { + c := l.r.Peek(0) + if c == '\n' { + l.r.Move(1) + return true + } else if c == '\r' { + if l.r.Peek(1) == '\n' { + l.r.Move(2) + } else { + l.r.Move(1) + } + return true + } else if c >= 0xC0 { + if r, n := l.r.PeekRune(0); r == '\u2028' || r == '\u2029' { + l.r.Move(n) + return true + } + } + return false +} + +func (l *Lexer) consumeDigit() bool { + if c := l.r.Peek(0); c >= '0' && c <= '9' { + l.r.Move(1) + return true + } + return false +} + +func (l *Lexer) consumeHexDigit() bool { + if c := l.r.Peek(0); (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') { + l.r.Move(1) + return true + } + return false +} + +func (l *Lexer) consumeBinaryDigit() bool { + if c := l.r.Peek(0); c == '0' || c == '1' { + l.r.Move(1) + return true + } + return false +} + +func (l *Lexer) consumeOctalDigit() bool { + if c := l.r.Peek(0); c >= '0' && c <= '7' { + l.r.Move(1) + return true + } + return false +} + +func (l *Lexer) consumeUnicodeEscape() bool { + if l.r.Peek(0) != '\\' || l.r.Peek(1) != 'u' { + return false + } + mark := l.r.Pos() + l.r.Move(2) + if c := l.r.Peek(0); c == '{' { + l.r.Move(1) + if l.consumeHexDigit() { + for l.consumeHexDigit() { + } + if c := l.r.Peek(0); c == '}' { + l.r.Move(1) + return true + } + } + l.r.Rewind(mark) + return false + } else if !l.consumeHexDigit() || !l.consumeHexDigit() || !l.consumeHexDigit() || !l.consumeHexDigit() { + l.r.Rewind(mark) + return false + } + return true +} + +func (l *Lexer) consumeSingleLineComment() { + for { + c := l.r.Peek(0) + if c == '\r' || c == '\n' || c == 0 { + break + } else if c >= 0xC0 { + if r, _ := l.r.PeekRune(0); r == '\u2028' || r == '\u2029' { + break + } + } + l.r.Move(1) + } +} + +//////////////////////////////////////////////////////////////// + +func (l *Lexer) consumeCommentToken() bool { + c := l.r.Peek(0) + if c == '/' { + c = l.r.Peek(1) + if c == '/' { + // single line + l.r.Move(2) + l.consumeSingleLineComment() + } else if c == '*' { + // multi line + l.r.Move(2) + for { + c := l.r.Peek(0) + if c == '*' && l.r.Peek(1) == '/' { + l.r.Move(2) + return true + } else if c == 0 { + break + } else if l.consumeLineTerminator() { + l.emptyLine = true + } else { + l.r.Move(1) + } + } + } else { + return false + } + } else if c == '<' && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' { + // opening HTML-style single line comment + l.r.Move(4) + l.consumeSingleLineComment() + } else if c == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' { + // closing HTML-style single line comment + // (only if current line didn't contain any meaningful tokens) + l.r.Move(3) + l.consumeSingleLineComment() + } else { + return false + } + return true +} + +func (l *Lexer) consumeLongPunctuatorToken() bool { + c := l.r.Peek(0) + if c == '!' || c == '=' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '&' || c == '|' || c == '^' { + l.r.Move(1) + if l.r.Peek(0) == '=' { + l.r.Move(1) + if (c == '!' || c == '=') && l.r.Peek(0) == '=' { + l.r.Move(1) + } + } else if (c == '+' || c == '-' || c == '&' || c == '|') && l.r.Peek(0) == c { + l.r.Move(1) + } else if c == '=' && l.r.Peek(0) == '>' { + l.r.Move(1) + } + } else { // c == '<' || c == '>' + l.r.Move(1) + if l.r.Peek(0) == c { + l.r.Move(1) + if c == '>' && l.r.Peek(0) == '>' { + l.r.Move(1) + } + } + if l.r.Peek(0) == '=' { + l.r.Move(1) + } + } + return true +} + +func (l *Lexer) consumeIdentifierToken() bool { + c := l.r.Peek(0) + if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '$' || c == '_' { + l.r.Move(1) + } else if c >= 0xC0 { + if r, n := l.r.PeekRune(0); unicode.IsOneOf(identifierStart, r) { + l.r.Move(n) + } else { + return false + } + } else if !l.consumeUnicodeEscape() { + return false + } + for { + c := l.r.Peek(0) + if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '$' || c == '_' { + l.r.Move(1) + } else if c >= 0xC0 { + if r, n := l.r.PeekRune(0); r == '\u200C' || r == '\u200D' || unicode.IsOneOf(identifierContinue, r) { + l.r.Move(n) + } else { + break + } + } else { + break + } + } + return true +} + +func (l *Lexer) consumeNumericToken() bool { + // assume to be on 0 1 2 3 4 5 6 7 8 9 . + mark := l.r.Pos() + c := l.r.Peek(0) + if c == '0' { + l.r.Move(1) + if l.r.Peek(0) == 'x' || l.r.Peek(0) == 'X' { + l.r.Move(1) + if l.consumeHexDigit() { + for l.consumeHexDigit() { + } + } else { + l.r.Move(-1) // return just the zero + } + return true + } else if l.r.Peek(0) == 'b' || l.r.Peek(0) == 'B' { + l.r.Move(1) + if l.consumeBinaryDigit() { + for l.consumeBinaryDigit() { + } + } else { + l.r.Move(-1) // return just the zero + } + return true + } else if l.r.Peek(0) == 'o' || l.r.Peek(0) == 'O' { + l.r.Move(1) + if l.consumeOctalDigit() { + for l.consumeOctalDigit() { + } + } else { + l.r.Move(-1) // return just the zero + } + return true + } + } else if c != '.' { + for l.consumeDigit() { + } + } + if l.r.Peek(0) == '.' { + l.r.Move(1) + if l.consumeDigit() { + for l.consumeDigit() { + } + } else if c != '.' { + // . could belong to the next token + l.r.Move(-1) + return true + } else { + l.r.Rewind(mark) + return false + } + } + mark = l.r.Pos() + c = l.r.Peek(0) + if c == 'e' || c == 'E' { + l.r.Move(1) + c = l.r.Peek(0) + if c == '+' || c == '-' { + l.r.Move(1) + } + if !l.consumeDigit() { + // e could belong to the next token + l.r.Rewind(mark) + return true + } + for l.consumeDigit() { + } + } + return true +} + +func (l *Lexer) consumeStringToken() bool { + // assume to be on ' or " + mark := l.r.Pos() + delim := l.r.Peek(0) + l.r.Move(1) + for { + c := l.r.Peek(0) + if c == delim { + l.r.Move(1) + break + } else if c == '\\' { + l.r.Move(1) + if !l.consumeLineTerminator() { + if c := l.r.Peek(0); c == delim || c == '\\' { + l.r.Move(1) + } + } + continue + } else if c == '\n' || c == '\r' { + l.r.Rewind(mark) + return false + } else if c >= 0xC0 { + if r, _ := l.r.PeekRune(0); r == '\u2028' || r == '\u2029' { + l.r.Rewind(mark) + return false + } + } else if c == 0 { + break + } + l.r.Move(1) + } + return true +} + +func (l *Lexer) consumeRegexpToken() bool { + // assume to be on / and not /* + mark := l.r.Pos() + l.r.Move(1) + inClass := false + for { + c := l.r.Peek(0) + if !inClass && c == '/' { + l.r.Move(1) + break + } else if c == '[' { + inClass = true + } else if c == ']' { + inClass = false + } else if c == '\\' { + l.r.Move(1) + if l.consumeLineTerminator() { + l.r.Rewind(mark) + return false + } + } else if l.consumeLineTerminator() { + l.r.Rewind(mark) + return false + } else if c == 0 { + return true + } + l.r.Move(1) + } + // flags + for { + c := l.r.Peek(0) + if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '$' || c == '_' { + l.r.Move(1) + } else if c >= 0xC0 { + if r, n := l.r.PeekRune(0); r == '\u200C' || r == '\u200D' || unicode.IsOneOf(identifierContinue, r) { + l.r.Move(n) + } else { + break + } + } else { + break + } + } + return true +} + +func (l *Lexer) consumeTemplateToken() bool { + // assume to be on ` or } when already within template + mark := l.r.Pos() + l.r.Move(1) + for { + c := l.r.Peek(0) + if c == '`' { + l.state = SubscriptState + l.r.Move(1) + return true + } else if c == '$' && l.r.Peek(1) == '{' { + l.enterContext(TemplateContext) + l.state = ExprState + l.r.Move(2) + return true + } else if c == 0 { + l.r.Rewind(mark) + return false + } + l.r.Move(1) + } +} diff --git a/vendor/github.com/tdewolff/parse/js/lex_test.go b/vendor/github.com/tdewolff/parse/js/lex_test.go new file mode 100644 index 0000000..b379321 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/js/lex_test.go @@ -0,0 +1,152 @@ +package js // import "github.com/tdewolff/parse/js" + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/tdewolff/test" +) + +type TTs []TokenType + +func TestTokens(t *testing.T) { + var tokenTests = []struct { + js string + expected []TokenType + }{ + {" \t\v\f\u00A0\uFEFF\u2000", TTs{}}, // WhitespaceToken + {"\n\r\r\n\u2028\u2029", TTs{LineTerminatorToken}}, + {"5.2 .04 0x0F 5e99", TTs{NumericToken, NumericToken, NumericToken, NumericToken}}, + {"a = 'string'", TTs{IdentifierToken, PunctuatorToken, StringToken}}, + {"/*comment*/ //comment", TTs{CommentToken, CommentToken}}, + {"{ } ( ) [ ]", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, + {". ; , < > <=", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, + {">= == != === !==", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, + {"+ - * % ++ --", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, + {"<< >> >>> & | ^", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, + {"! ~ && || ? :", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, + {"= += -= *= %= <<=", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, + {">>= >>>= &= |= ^= =>", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, + {"a = /.*/g;", TTs{IdentifierToken, PunctuatorToken, RegexpToken, PunctuatorToken}}, + + {"/*co\nm\u2028m/*ent*/ //co//mment\u2029//comment", TTs{CommentToken, CommentToken, LineTerminatorToken, CommentToken}}, + {"<!-", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken}}, + {"1<!--2\n", TTs{NumericToken, CommentToken, LineTerminatorToken}}, + {"x=y-->10\n", TTs{IdentifierToken, PunctuatorToken, IdentifierToken, PunctuatorToken, PunctuatorToken, NumericToken, LineTerminatorToken}}, + {" /*comment*/ -->nothing\n", TTs{CommentToken, CommentToken, LineTerminatorToken}}, + {"1 /*comment\nmultiline*/ -->nothing\n", TTs{NumericToken, CommentToken, CommentToken, LineTerminatorToken}}, + {"$ _\u200C \\u2000 \u200C", TTs{IdentifierToken, IdentifierToken, IdentifierToken, UnknownToken}}, + {">>>=>>>>=", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken}}, + {"1/", TTs{NumericToken, PunctuatorToken}}, + {"1/=", TTs{NumericToken, PunctuatorToken}}, + {"010xF", TTs{NumericToken, NumericToken, IdentifierToken}}, + {"50e+-0", TTs{NumericToken, IdentifierToken, PunctuatorToken, PunctuatorToken, NumericToken}}, + {"'str\\i\\'ng'", TTs{StringToken}}, + {"'str\\\\'abc", TTs{StringToken, IdentifierToken}}, + {"'str\\\ni\\\\u00A0ng'", TTs{StringToken}}, + {"a = /[a-z/]/g", TTs{IdentifierToken, PunctuatorToken, RegexpToken}}, + {"a=/=/g1", TTs{IdentifierToken, PunctuatorToken, RegexpToken}}, + {"a = /'\\\\/\n", TTs{IdentifierToken, PunctuatorToken, RegexpToken, LineTerminatorToken}}, + {"a=/\\//g1", TTs{IdentifierToken, PunctuatorToken, RegexpToken}}, + {"new RegExp(a + /\\d{1,2}/.source)", TTs{IdentifierToken, IdentifierToken, PunctuatorToken, IdentifierToken, PunctuatorToken, RegexpToken, PunctuatorToken, IdentifierToken, PunctuatorToken}}, + + {"0b0101 0o0707 0b17", TTs{NumericToken, NumericToken, NumericToken, NumericToken}}, + {"`template`", TTs{TemplateToken}}, + {"`a${x+y}b`", TTs{TemplateToken, IdentifierToken, PunctuatorToken, IdentifierToken, TemplateToken}}, + {"`temp\nlate`", TTs{TemplateToken}}, + {"`outer${{x: 10}}bar${ raw`nested${2}endnest` }end`", TTs{TemplateToken, PunctuatorToken, IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, TemplateToken, IdentifierToken, TemplateToken, NumericToken, TemplateToken, TemplateToken}}, + + // early endings + {"'string", TTs{StringToken}}, + {"'\n '\u2028", TTs{UnknownToken, LineTerminatorToken, UnknownToken, LineTerminatorToken}}, + {"'str\\\U00100000ing\\0'", TTs{StringToken}}, + {"'strin\\00g'", TTs{StringToken}}, + {"/*comment", TTs{CommentToken}}, + {"a=/regexp", TTs{IdentifierToken, PunctuatorToken, RegexpToken}}, + {"\\u002", TTs{UnknownToken, IdentifierToken}}, + + // coverage + {"Ø a〉", TTs{IdentifierToken, IdentifierToken, UnknownToken}}, + {"0xg 0.f", TTs{NumericToken, IdentifierToken, NumericToken, PunctuatorToken, IdentifierToken}}, + {"0bg 0og", TTs{NumericToken, IdentifierToken, NumericToken, IdentifierToken}}, + {"\u00A0\uFEFF\u2000", TTs{}}, + {"\u2028\u2029", TTs{LineTerminatorToken}}, + {"\\u0029ident", TTs{IdentifierToken}}, + {"\\u{0029FEF}ident", TTs{IdentifierToken}}, + {"\\u{}", TTs{UnknownToken, IdentifierToken, PunctuatorToken, PunctuatorToken}}, + {"\\ugident", TTs{UnknownToken, IdentifierToken}}, + {"'str\u2028ing'", TTs{UnknownToken, IdentifierToken, LineTerminatorToken, IdentifierToken, StringToken}}, + {"a=/\\\n", TTs{IdentifierToken, PunctuatorToken, PunctuatorToken, UnknownToken, LineTerminatorToken}}, + {"a=/x/\u200C\u3009", TTs{IdentifierToken, PunctuatorToken, RegexpToken, UnknownToken}}, + {"a=/x\n", TTs{IdentifierToken, PunctuatorToken, PunctuatorToken, IdentifierToken, LineTerminatorToken}}, + + {"return /abc/;", TTs{IdentifierToken, RegexpToken, PunctuatorToken}}, + {"yield /abc/;", TTs{IdentifierToken, RegexpToken, PunctuatorToken}}, + {"a/b/g", TTs{IdentifierToken, PunctuatorToken, IdentifierToken, PunctuatorToken, IdentifierToken}}, + {"{}/1/g", TTs{PunctuatorToken, PunctuatorToken, RegexpToken}}, + {"i(0)/1/g", TTs{IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, PunctuatorToken, NumericToken, PunctuatorToken, IdentifierToken}}, + {"if(0)/1/g", TTs{IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, RegexpToken}}, + {"a.if(0)/1/g", TTs{IdentifierToken, PunctuatorToken, IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, PunctuatorToken, NumericToken, PunctuatorToken, IdentifierToken}}, + {"while(0)/1/g", TTs{IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, RegexpToken}}, + {"for(;;)/1/g", TTs{IdentifierToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, RegexpToken}}, + {"with(0)/1/g", TTs{IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, RegexpToken}}, + {"this/1/g", TTs{IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, IdentifierToken}}, + {"case /1/g:", TTs{IdentifierToken, RegexpToken, PunctuatorToken}}, + {"function f(){}/1/g", TTs{IdentifierToken, IdentifierToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, RegexpToken}}, + {"this.return/1/g", TTs{IdentifierToken, PunctuatorToken, IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, IdentifierToken}}, + {"(a+b)/1/g", TTs{PunctuatorToken, IdentifierToken, PunctuatorToken, IdentifierToken, PunctuatorToken, PunctuatorToken, NumericToken, PunctuatorToken, IdentifierToken}}, + + // go fuzz + {"`", TTs{UnknownToken}}, + } + + for _, tt := range tokenTests { + t.Run(tt.js, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.js)) + i := 0 + j := 0 + for { + token, _ := l.Next() + j++ + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } else if token == WhitespaceToken { + continue + } + if i < len(tt.expected) { + if token != tt.expected[i] { + test.String(t, token.String(), tt.expected[i].String(), "token types must match") + break + } + } else { + test.Fail(t, "index", i, "must not exceed expected token types size", len(tt.expected)) + break + } + i++ + } + }) + } + + test.T(t, WhitespaceToken.String(), "Whitespace") + test.T(t, TokenType(100).String(), "Invalid(100)") +} + +//////////////////////////////////////////////////////////////// + +func ExampleNewLexer() { + l := NewLexer(bytes.NewBufferString("var x = 'lorem ipsum';")) + out := "" + for { + tt, data := l.Next() + if tt == ErrorToken { + break + } + out += string(data) + } + fmt.Println(out) + // Output: var x = 'lorem ipsum'; +} diff --git a/vendor/github.com/tdewolff/parse/json/README.md b/vendor/github.com/tdewolff/parse/json/README.md new file mode 100644 index 0000000..7621ca9 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/json/README.md @@ -0,0 +1,81 @@ +# JSON [![GoDoc](http://godoc.org/github.com/tdewolff/parse/json?status.svg)](http://godoc.org/github.com/tdewolff/parse/json) [![GoCover](http://gocover.io/_badge/github.com/tdewolff/parse/json)](http://gocover.io/github.com/tdewolff/parse/json) + +This package is a JSON lexer (ECMA-404) written in [Go][1]. It follows the specification at [JSON](http://json.org/). The lexer takes an io.Reader and converts it into tokens until the EOF. + +## Installation +Run the following command + + go get github.com/tdewolff/parse/json + +or add the following import and run project with `go get` + + import "github.com/tdewolff/parse/json" + +## Parser +### Usage +The following initializes a new Parser with io.Reader `r`: +``` go +p := json.NewParser(r) +``` + +To tokenize until EOF an error, use: +``` go +for { + gt, text := p.Next() + switch gt { + case json.ErrorGrammar: + // error or EOF set in p.Err() + return + // ... + } +} +``` + +All grammars: +``` go +ErrorGrammar GrammarType = iota // extra grammar when errors occur +WhitespaceGrammar // space \t \r \n +LiteralGrammar // null true false +NumberGrammar +StringGrammar +StartObjectGrammar // { +EndObjectGrammar // } +StartArrayGrammar // [ +EndArrayGrammar // ] +``` + +### Examples +``` go +package main + +import ( + "os" + + "github.com/tdewolff/parse/json" +) + +// Tokenize JSON from stdin. +func main() { + p := json.NewParser(os.Stdin) + for { + gt, text := p.Next() + switch gt { + case json.ErrorGrammar: + if p.Err() != io.EOF { + fmt.Println("Error on line", p.Line(), ":", p.Err()) + } + return + case json.LiteralGrammar: + fmt.Println("Literal", string(text)) + case json.NumberGrammar: + fmt.Println("Number", string(text)) + // ... + } + } +} +``` + +## License +Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). + +[1]: http://golang.org/ "Go Language" diff --git a/vendor/github.com/tdewolff/parse/json/parse.go b/vendor/github.com/tdewolff/parse/json/parse.go new file mode 100644 index 0000000..ae133f2 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/json/parse.go @@ -0,0 +1,307 @@ +// Package json is a JSON parser following the specifications at http://json.org/. +package json // import "github.com/tdewolff/parse/json" + +import ( + "io" + "strconv" + + "github.com/tdewolff/parse" + "github.com/tdewolff/parse/buffer" +) + +// GrammarType determines the type of grammar +type GrammarType uint32 + +// GrammarType values. +const ( + ErrorGrammar GrammarType = iota // extra grammar when errors occur + WhitespaceGrammar + LiteralGrammar + NumberGrammar + StringGrammar + StartObjectGrammar // { + EndObjectGrammar // } + StartArrayGrammar // [ + EndArrayGrammar // ] +) + +// String returns the string representation of a GrammarType. +func (gt GrammarType) String() string { + switch gt { + case ErrorGrammar: + return "Error" + case WhitespaceGrammar: + return "Whitespace" + case LiteralGrammar: + return "Literal" + case NumberGrammar: + return "Number" + case StringGrammar: + return "String" + case StartObjectGrammar: + return "StartObject" + case EndObjectGrammar: + return "EndObject" + case StartArrayGrammar: + return "StartArray" + case EndArrayGrammar: + return "EndArray" + } + return "Invalid(" + strconv.Itoa(int(gt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// State determines the current state the parser is in. +type State uint32 + +// State values. +const ( + ValueState State = iota // extra token when errors occur + ObjectKeyState + ObjectValueState + ArrayState +) + +// String returns the string representation of a State. +func (state State) String() string { + switch state { + case ValueState: + return "Value" + case ObjectKeyState: + return "ObjectKey" + case ObjectValueState: + return "ObjectValue" + case ArrayState: + return "Array" + } + return "Invalid(" + strconv.Itoa(int(state)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// Parser is the state for the lexer. +type Parser struct { + r *buffer.Lexer + state []State + err error + + needComma bool +} + +// NewParser returns a new Parser for a given io.Reader. +func NewParser(r io.Reader) *Parser { + return &Parser{ + r: buffer.NewLexer(r), + state: []State{ValueState}, + } +} + +// Err returns the error encountered during tokenization, this is often io.EOF but also other errors can be returned. +func (p *Parser) Err() error { + if err := p.r.Err(); err != nil { + return err + } + return p.err +} + +// Restore restores the NULL byte at the end of the buffer. +func (p *Parser) Restore() { + p.r.Restore() +} + +// Next returns the next Grammar. It returns ErrorGrammar when an error was encountered. Using Err() one can retrieve the error message. +func (p *Parser) Next() (GrammarType, []byte) { + p.moveWhitespace() + c := p.r.Peek(0) + state := p.state[len(p.state)-1] + if c == ',' { + if state != ArrayState && state != ObjectKeyState { + p.err = parse.NewErrorLexer("unexpected comma character outside an array or object", p.r) + return ErrorGrammar, nil + } + p.r.Move(1) + p.moveWhitespace() + p.needComma = false + c = p.r.Peek(0) + } + p.r.Skip() + + if p.needComma && c != '}' && c != ']' && c != 0 { + p.err = parse.NewErrorLexer("expected comma character or an array or object ending", p.r) + return ErrorGrammar, nil + } else if c == '{' { + p.state = append(p.state, ObjectKeyState) + p.r.Move(1) + return StartObjectGrammar, p.r.Shift() + } else if c == '}' { + if state != ObjectKeyState { + p.err = parse.NewErrorLexer("unexpected right brace character", p.r) + return ErrorGrammar, nil + } + p.needComma = true + p.state = p.state[:len(p.state)-1] + if p.state[len(p.state)-1] == ObjectValueState { + p.state[len(p.state)-1] = ObjectKeyState + } + p.r.Move(1) + return EndObjectGrammar, p.r.Shift() + } else if c == '[' { + p.state = append(p.state, ArrayState) + p.r.Move(1) + return StartArrayGrammar, p.r.Shift() + } else if c == ']' { + p.needComma = true + if state != ArrayState { + p.err = parse.NewErrorLexer("unexpected right bracket character", p.r) + return ErrorGrammar, nil + } + p.state = p.state[:len(p.state)-1] + if p.state[len(p.state)-1] == ObjectValueState { + p.state[len(p.state)-1] = ObjectKeyState + } + p.r.Move(1) + return EndArrayGrammar, p.r.Shift() + } else if state == ObjectKeyState { + if c != '"' || !p.consumeStringToken() { + p.err = parse.NewErrorLexer("expected object key to be a quoted string", p.r) + return ErrorGrammar, nil + } + n := p.r.Pos() + p.moveWhitespace() + if c := p.r.Peek(0); c != ':' { + p.err = parse.NewErrorLexer("expected colon character after object key", p.r) + return ErrorGrammar, nil + } + p.r.Move(1) + p.state[len(p.state)-1] = ObjectValueState + return StringGrammar, p.r.Shift()[:n] + } else { + p.needComma = true + if state == ObjectValueState { + p.state[len(p.state)-1] = ObjectKeyState + } + if c == '"' && p.consumeStringToken() { + return StringGrammar, p.r.Shift() + } else if p.consumeNumberToken() { + return NumberGrammar, p.r.Shift() + } else if p.consumeLiteralToken() { + return LiteralGrammar, p.r.Shift() + } + } + return ErrorGrammar, nil +} + +// State returns the state the parser is currently in (ie. which token is expected). +func (p *Parser) State() State { + return p.state[len(p.state)-1] +} + +//////////////////////////////////////////////////////////////// + +/* +The following functions follow the specifications at http://json.org/ +*/ + +func (p *Parser) moveWhitespace() { + for { + if c := p.r.Peek(0); c != ' ' && c != '\n' && c != '\r' && c != '\t' { + break + } + p.r.Move(1) + } +} + +func (p *Parser) consumeLiteralToken() bool { + c := p.r.Peek(0) + if c == 't' && p.r.Peek(1) == 'r' && p.r.Peek(2) == 'u' && p.r.Peek(3) == 'e' { + p.r.Move(4) + return true + } else if c == 'f' && p.r.Peek(1) == 'a' && p.r.Peek(2) == 'l' && p.r.Peek(3) == 's' && p.r.Peek(4) == 'e' { + p.r.Move(5) + return true + } else if c == 'n' && p.r.Peek(1) == 'u' && p.r.Peek(2) == 'l' && p.r.Peek(3) == 'l' { + p.r.Move(4) + return true + } + return false +} + +func (p *Parser) consumeNumberToken() bool { + mark := p.r.Pos() + if p.r.Peek(0) == '-' { + p.r.Move(1) + } + c := p.r.Peek(0) + if c >= '1' && c <= '9' { + p.r.Move(1) + for { + if c := p.r.Peek(0); c < '0' || c > '9' { + break + } + p.r.Move(1) + } + } else if c != '0' { + p.r.Rewind(mark) + return false + } else { + p.r.Move(1) // 0 + } + if c := p.r.Peek(0); c == '.' { + p.r.Move(1) + if c := p.r.Peek(0); c < '0' || c > '9' { + p.r.Move(-1) + return true + } + for { + if c := p.r.Peek(0); c < '0' || c > '9' { + break + } + p.r.Move(1) + } + } + mark = p.r.Pos() + if c := p.r.Peek(0); c == 'e' || c == 'E' { + p.r.Move(1) + if c := p.r.Peek(0); c == '+' || c == '-' { + p.r.Move(1) + } + if c := p.r.Peek(0); c < '0' || c > '9' { + p.r.Rewind(mark) + return true + } + for { + if c := p.r.Peek(0); c < '0' || c > '9' { + break + } + p.r.Move(1) + } + } + return true +} + +func (p *Parser) consumeStringToken() bool { + // assume to be on " + p.r.Move(1) + for { + c := p.r.Peek(0) + if c == '"' { + escaped := false + for i := p.r.Pos() - 1; i >= 0; i-- { + if p.r.Lexeme()[i] == '\\' { + escaped = !escaped + } else { + break + } + } + if !escaped { + p.r.Move(1) + break + } + } else if c == 0 { + return false + } + p.r.Move(1) + } + return true +} diff --git a/vendor/github.com/tdewolff/parse/json/parse_test.go b/vendor/github.com/tdewolff/parse/json/parse_test.go new file mode 100644 index 0000000..6ea28d1 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/json/parse_test.go @@ -0,0 +1,159 @@ +package json // import "github.com/tdewolff/parse/json" + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/tdewolff/parse" + "github.com/tdewolff/test" +) + +type GTs []GrammarType + +func TestGrammars(t *testing.T) { + var grammarTests = []struct { + json string + expected []GrammarType + }{ + {" \t\n\r", GTs{}}, // WhitespaceGrammar + {"null", GTs{LiteralGrammar}}, + {"[]", GTs{StartArrayGrammar, EndArrayGrammar}}, + {"15.2", GTs{NumberGrammar}}, + {"0.4", GTs{NumberGrammar}}, + {"5e9", GTs{NumberGrammar}}, + {"-4E-3", GTs{NumberGrammar}}, + {"true", GTs{LiteralGrammar}}, + {"false", GTs{LiteralGrammar}}, + {"null", GTs{LiteralGrammar}}, + {`""`, GTs{StringGrammar}}, + {`"abc"`, GTs{StringGrammar}}, + {`"\""`, GTs{StringGrammar}}, + {`"\\"`, GTs{StringGrammar}}, + {"{}", GTs{StartObjectGrammar, EndObjectGrammar}}, + {`{"a": "b", "c": "d"}`, GTs{StartObjectGrammar, StringGrammar, StringGrammar, StringGrammar, StringGrammar, EndObjectGrammar}}, + {`{"a": [1, 2], "b": {"c": 3}}`, GTs{StartObjectGrammar, StringGrammar, StartArrayGrammar, NumberGrammar, NumberGrammar, EndArrayGrammar, StringGrammar, StartObjectGrammar, StringGrammar, NumberGrammar, EndObjectGrammar, EndObjectGrammar}}, + {"[null,]", GTs{StartArrayGrammar, LiteralGrammar, EndArrayGrammar}}, + // {"[\"x\\\x00y\", 0]", GTs{StartArrayGrammar, StringGrammar, NumberGrammar, EndArrayGrammar}}, + } + for _, tt := range grammarTests { + t.Run(tt.json, func(t *testing.T) { + p := NewParser(bytes.NewBufferString(tt.json)) + i := 0 + for { + grammar, _ := p.Next() + if grammar == ErrorGrammar { + test.T(t, p.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } else if grammar == WhitespaceGrammar { + continue + } + test.That(t, i < len(tt.expected), "index", i, "must not exceed expected grammar types size", len(tt.expected)) + if i < len(tt.expected) { + test.T(t, grammar, tt.expected[i], "grammar types must match") + } + i++ + } + }) + } + + test.T(t, WhitespaceGrammar.String(), "Whitespace") + test.T(t, GrammarType(100).String(), "Invalid(100)") + test.T(t, ValueState.String(), "Value") + test.T(t, ObjectKeyState.String(), "ObjectKey") + test.T(t, ObjectValueState.String(), "ObjectValue") + test.T(t, ArrayState.String(), "Array") + test.T(t, State(100).String(), "Invalid(100)") +} + +func TestGrammarsError(t *testing.T) { + var grammarErrorTests = []struct { + json string + col int + }{ + {"true, false", 5}, + {"[true false]", 7}, + {"]", 1}, + {"}", 1}, + {"{0: 1}", 2}, + {"{\"a\" 1}", 6}, + {"1.", 2}, + {"1e+", 2}, + {`{"":"`, 0}, + {"\"a\\", 0}, + } + for _, tt := range grammarErrorTests { + t.Run(tt.json, func(t *testing.T) { + p := NewParser(bytes.NewBufferString(tt.json)) + for { + grammar, _ := p.Next() + if grammar == ErrorGrammar { + if tt.col == 0 { + test.T(t, p.Err(), io.EOF) + } else if perr, ok := p.Err().(*parse.Error); ok { + test.T(t, perr.Col, tt.col) + } else { + test.Fail(t, "bad error:", p.Err()) + } + break + } + } + }) + } +} + +func TestStates(t *testing.T) { + var stateTests = []struct { + json string + expected []State + }{ + {"null", []State{ValueState}}, + {"[null]", []State{ArrayState, ArrayState, ValueState}}, + {"{\"\":null}", []State{ObjectKeyState, ObjectValueState, ObjectKeyState, ValueState}}, + } + for _, tt := range stateTests { + t.Run(tt.json, func(t *testing.T) { + p := NewParser(bytes.NewBufferString(tt.json)) + i := 0 + for { + grammar, _ := p.Next() + state := p.State() + if grammar == ErrorGrammar { + test.T(t, p.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } else if grammar == WhitespaceGrammar { + continue + } + test.That(t, i < len(tt.expected), "index", i, "must not exceed expected states size", len(tt.expected)) + if i < len(tt.expected) { + test.T(t, state, tt.expected[i], "states must match") + } + i++ + } + }) + } +} + +//////////////////////////////////////////////////////////////// + +func ExampleNewParser() { + p := NewParser(bytes.NewBufferString(`{"key": 5}`)) + out := "" + for { + state := p.State() + gt, data := p.Next() + if gt == ErrorGrammar { + break + } + out += string(data) + if state == ObjectKeyState && gt != EndObjectGrammar { + out += ":" + } + // not handling comma insertion + } + fmt.Println(out) + // Output: {"key":5} +} diff --git a/vendor/github.com/tdewolff/parse/position.go b/vendor/github.com/tdewolff/parse/position.go new file mode 100644 index 0000000..690fcfa --- /dev/null +++ b/vendor/github.com/tdewolff/parse/position.go @@ -0,0 +1,79 @@ +package parse + +import ( + "fmt" + "io" + "strings" + + "github.com/tdewolff/parse/buffer" +) + +// Position returns the line and column number for a certain position in a file. It is useful for recovering the position in a file that caused an error. +// It only treates \n, \r, and \r\n as newlines, which might be different from some languages also recognizing \f, \u2028, and \u2029 to be newlines. +func Position(r io.Reader, offset int) (line, col int, context string, err error) { + l := buffer.NewLexer(r) + + line = 1 + for { + c := l.Peek(0) + if c == 0 { + col = l.Pos() + 1 + context = positionContext(l, line, col) + err = l.Err() + if err == nil { + err = io.EOF + } + return + } + + if offset == l.Pos() { + col = l.Pos() + 1 + context = positionContext(l, line, col) + return + } + + if c == '\n' { + l.Move(1) + line++ + offset -= l.Pos() + l.Skip() + } else if c == '\r' { + if l.Peek(1) == '\n' { + if offset == l.Pos()+1 { + l.Move(1) + continue + } + l.Move(2) + } else { + l.Move(1) + } + line++ + offset -= l.Pos() + l.Skip() + } else { + l.Move(1) + } + } +} + +func positionContext(l *buffer.Lexer, line, col int) (context string) { + for { + c := l.Peek(0) + if c == 0 && l.Err() != nil || c == '\n' || c == '\r' { + break + } + l.Move(1) + } + + // replace unprintable characters by a space + b := l.Lexeme() + for i, c := range b { + if c < 0x20 || c == 0x7F { + b[i] = ' ' + } + } + + context += fmt.Sprintf("%5d: %s\n", line, string(b)) + context += fmt.Sprintf("%s^", strings.Repeat(" ", col+6)) + return +} diff --git a/vendor/github.com/tdewolff/parse/position_test.go b/vendor/github.com/tdewolff/parse/position_test.go new file mode 100644 index 0000000..eb1e4e5 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/position_test.go @@ -0,0 +1,42 @@ +package parse + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/tdewolff/test" +) + +func TestPosition(t *testing.T) { + var newlineTests = []struct { + offset int + buf string + line int + col int + err error + }{ + {0, "x", 1, 1, nil}, + {1, "xx", 1, 2, nil}, + {2, "x\nx", 2, 1, nil}, + {2, "\n\nx", 3, 1, nil}, + {3, "\nxxx", 2, 3, nil}, + {2, "\r\nx", 2, 1, nil}, + + // edge cases + {0, "", 1, 1, io.EOF}, + {0, "\n", 1, 1, nil}, + {1, "\r\n", 1, 2, nil}, + {-1, "x", 1, 2, io.EOF}, // continue till the end + } + for _, tt := range newlineTests { + t.Run(fmt.Sprint(tt.buf, " ", tt.offset), func(t *testing.T) { + r := bytes.NewBufferString(tt.buf) + line, col, _, err := Position(r, tt.offset) + test.T(t, err, tt.err) + test.T(t, line, tt.line, "line") + test.T(t, col, tt.col, "column") + }) + } +} diff --git a/vendor/github.com/tdewolff/parse/strconv/float.go b/vendor/github.com/tdewolff/parse/strconv/float.go new file mode 100644 index 0000000..da1a30d --- /dev/null +++ b/vendor/github.com/tdewolff/parse/strconv/float.go @@ -0,0 +1,251 @@ +package strconv // import "github.com/tdewolff/parse/strconv" + +import "math" + +var float64pow10 = []float64{ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, +} + +// Float parses a byte-slice and returns the float it represents. +// If an invalid character is encountered, it will stop there. +func ParseFloat(b []byte) (float64, int) { + i := 0 + neg := false + if i < len(b) && (b[i] == '+' || b[i] == '-') { + neg = b[i] == '-' + i++ + } + + dot := -1 + trunk := -1 + n := uint64(0) + for ; i < len(b); i++ { + c := b[i] + if c >= '0' && c <= '9' { + if trunk == -1 { + if n > math.MaxUint64/10 { + trunk = i + } else { + n *= 10 + n += uint64(c - '0') + } + } + } else if dot == -1 && c == '.' { + dot = i + } else { + break + } + } + + f := float64(n) + if neg { + f = -f + } + + mantExp := int64(0) + if dot != -1 { + if trunk == -1 { + trunk = i + } + mantExp = int64(trunk - dot - 1) + } else if trunk != -1 { + mantExp = int64(trunk - i) + } + expExp := int64(0) + if i < len(b) && (b[i] == 'e' || b[i] == 'E') { + i++ + if e, expLen := ParseInt(b[i:]); expLen > 0 { + expExp = e + i += expLen + } + } + exp := expExp - mantExp + + // copied from strconv/atof.go + if exp == 0 { + return f, i + } else if exp > 0 && exp <= 15+22 { // int * 10^k + // If exponent is big but number of digits is not, + // can move a few zeros into the integer part. + if exp > 22 { + f *= float64pow10[exp-22] + exp = 22 + } + if f <= 1e15 && f >= -1e15 { + return f * float64pow10[exp], i + } + } else if exp < 0 && exp >= -22 { // int / 10^k + return f / float64pow10[-exp], i + } + f *= math.Pow10(int(-mantExp)) + return f * math.Pow10(int(expExp)), i +} + +const log2 = 0.301029995 +const int64maxlen = 18 + +func float64exp(f float64) int { + exp2 := 0 + if f != 0.0 { + x := math.Float64bits(f) + exp2 = int(x>>(64-11-1))&0x7FF - 1023 + 1 + } + + exp10 := float64(exp2) * log2 + if exp10 < 0 { + exp10 -= 1.0 + } + return int(exp10) +} + +func AppendFloat(b []byte, f float64, prec int) ([]byte, bool) { + if math.IsNaN(f) || math.IsInf(f, 0) { + return b, false + } else if prec >= int64maxlen { + return b, false + } + + neg := false + if f < 0.0 { + f = -f + neg = true + } + if prec == -1 { + prec = int64maxlen - 1 + } + prec -= float64exp(f) // number of digits in front of the dot + f *= math.Pow10(prec) + + // calculate mantissa and exponent + mant := int64(f) + mantLen := LenInt(mant) + mantExp := mantLen - prec - 1 + if mant == 0 { + return append(b, '0'), true + } + + // expLen is zero for positive exponents, because positive exponents are determined later on in the big conversion loop + exp := 0 + expLen := 0 + if mantExp > 0 { + // positive exponent is determined in the loop below + // but if we initially decreased the exponent to fit in an integer, we can't set the new exponent in the loop alone, + // since the number of zeros at the end determines the positive exponent in the loop, and we just artificially lost zeros + if prec < 0 { + exp = mantExp + } + expLen = 1 + LenInt(int64(exp)) // e + digits + } else if mantExp < -3 { + exp = mantExp + expLen = 2 + LenInt(int64(exp)) // e + minus + digits + } else if mantExp < -1 { + mantLen += -mantExp - 1 // extra zero between dot and first digit + } + + // reserve space in b + i := len(b) + maxLen := 1 + mantLen + expLen // dot + mantissa digits + exponent + if neg { + maxLen++ + } + if i+maxLen > cap(b) { + b = append(b, make([]byte, maxLen)...) + } else { + b = b[:i+maxLen] + } + + // write to string representation + if neg { + b[i] = '-' + i++ + } + + // big conversion loop, start at the end and move to the front + // initially print trailing zeros and remove them later on + // for example if the first non-zero digit is three positions in front of the dot, it will overwrite the zeros with a positive exponent + zero := true + last := i + mantLen // right-most position of digit that is non-zero + dot + dot := last - prec - exp // position of dot + j := last + for mant > 0 { + if j == dot { + b[j] = '.' + j-- + } + newMant := mant / 10 + digit := mant - 10*newMant + if zero && digit > 0 { + // first non-zero digit, if we are still behind the dot we can trim the end to this position + // otherwise trim to the dot (including the dot) + if j > dot { + i = j + 1 + // decrease negative exponent further to get rid of dot + if exp < 0 { + newExp := exp - (j - dot) + // getting rid of the dot shouldn't lower the exponent to more digits (e.g. -9 -> -10) + if LenInt(int64(newExp)) == LenInt(int64(exp)) { + exp = newExp + dot = j + j-- + i-- + } + } + } else { + i = dot + } + last = j + zero = false + } + b[j] = '0' + byte(digit) + j-- + mant = newMant + } + + if j > dot { + // extra zeros behind the dot + for j > dot { + b[j] = '0' + j-- + } + b[j] = '.' + } else if last+3 < dot { + // add positive exponent because we have 3 or more zeros in front of the dot + i = last + 1 + exp = dot - last - 1 + } else if j == dot { + // handle 0.1 + b[j] = '.' + } + + // exponent + if exp != 0 { + if exp == 1 { + b[i] = '0' + i++ + } else if exp == 2 { + b[i] = '0' + b[i+1] = '0' + i += 2 + } else { + b[i] = 'e' + i++ + if exp < 0 { + b[i] = '-' + i++ + exp = -exp + } + i += LenInt(int64(exp)) + j := i + for exp > 0 { + newExp := exp / 10 + digit := exp - 10*newExp + j-- + b[j] = '0' + byte(digit) + exp = newExp + } + } + } + return b[:i], true +} diff --git a/vendor/github.com/tdewolff/parse/strconv/float_test.go b/vendor/github.com/tdewolff/parse/strconv/float_test.go new file mode 100644 index 0000000..b1f2cfb --- /dev/null +++ b/vendor/github.com/tdewolff/parse/strconv/float_test.go @@ -0,0 +1,196 @@ +package strconv // import "github.com/tdewolff/parse/strconv" + +import ( + "fmt" + "math" + "math/rand" + "strconv" + "testing" + + "github.com/tdewolff/test" +) + +func TestParseFloat(t *testing.T) { + floatTests := []struct { + f string + expected float64 + }{ + {"5", 5}, + {"5.1", 5.1}, + {"-5.1", -5.1}, + {"5.1e-2", 5.1e-2}, + {"5.1e+2", 5.1e+2}, + {"0.0e1", 0.0e1}, + {"18446744073709551620", 18446744073709551620.0}, + {"1e23", 1e23}, + // TODO: hard to test due to float imprecision + // {"1.7976931348623e+308", 1.7976931348623e+308) + // {"4.9406564584124e-308", 4.9406564584124e-308) + } + for _, tt := range floatTests { + f, n := ParseFloat([]byte(tt.f)) + test.That(t, n == len(tt.f), "parsed", n, "characters instead for", tt.f) + test.That(t, f == tt.expected, "return", tt.expected, "for", tt.f) + } +} + +func TestAppendFloat(t *testing.T) { + floatTests := []struct { + f float64 + prec int + expected string + }{ + {0, 6, "0"}, + {1, 6, "1"}, + {9, 6, "9"}, + {9.99999, 6, "9.99999"}, + {123, 6, "123"}, + {0.123456, 6, ".123456"}, + {0.066, 6, ".066"}, + {0.0066, 6, ".0066"}, + {12e2, 6, "1200"}, + {12e3, 6, "12e3"}, + {0.1, 6, ".1"}, + {0.001, 6, ".001"}, + {0.0001, 6, "1e-4"}, + {-1, 6, "-1"}, + {-123, 6, "-123"}, + {-123.456, 6, "-123.456"}, + {-12e3, 6, "-12e3"}, + {-0.1, 6, "-.1"}, + {-0.0001, 6, "-1e-4"}, + {0.000100009, 10, "100009e-9"}, + {0.0001000009, 10, "1.000009e-4"}, + {1e18, 0, "1e18"}, + //{1e19, 0, "1e19"}, + //{1e19, 18, "1e19"}, + {1e1, 0, "10"}, + {1e2, 1, "100"}, + {1e3, 2, "1e3"}, + {1e10, -1, "1e10"}, + {1e15, -1, "1e15"}, + {1e-5, 6, "1e-5"}, + {math.NaN(), 0, ""}, + {math.Inf(1), 0, ""}, + {math.Inf(-1), 0, ""}, + {0, 19, ""}, + {.000923361977200859392, -1, "9.23361977200859392e-4"}, + } + for _, tt := range floatTests { + f, _ := AppendFloat([]byte{}, tt.f, tt.prec) + test.String(t, string(f), tt.expected, "for", tt.f) + } + + b := make([]byte, 0, 22) + AppendFloat(b, 12.34, -1) + test.String(t, string(b[:5]), "12.34", "in buffer") +} + +//////////////////////////////////////////////////////////////// + +func TestAppendFloatRandom(t *testing.T) { + N := int(1e6) + if testing.Short() { + N = 0 + } + r := rand.New(rand.NewSource(99)) + //prec := 10 + for i := 0; i < N; i++ { + f := r.ExpFloat64() + //f = math.Floor(f*float64(prec)) / float64(prec) + + b, _ := AppendFloat([]byte{}, f, -1) + f2, _ := strconv.ParseFloat(string(b), 64) + if math.Abs(f-f2) > 1e-6 { + fmt.Println("Bad:", f, "!=", f2, "in", string(b)) + } + } +} + +func BenchmarkFloatToBytes1(b *testing.B) { + r := []byte{} //make([]byte, 10) + f := 123.456 + for i := 0; i < b.N; i++ { + r = strconv.AppendFloat(r[:0], f, 'g', 6, 64) + } +} + +func BenchmarkFloatToBytes2(b *testing.B) { + r := make([]byte, 10) + f := 123.456 + for i := 0; i < b.N; i++ { + r, _ = AppendFloat(r[:0], f, 6) + } +} + +func BenchmarkModf1(b *testing.B) { + f := 123.456 + x := 0.0 + for i := 0; i < b.N; i++ { + a, b := math.Modf(f) + x += a + b + } +} + +func BenchmarkModf2(b *testing.B) { + f := 123.456 + x := 0.0 + for i := 0; i < b.N; i++ { + a := float64(int64(f)) + b := f - a + x += a + b + } +} + +func BenchmarkPrintInt1(b *testing.B) { + X := int64(123456789) + n := LenInt(X) + r := make([]byte, n) + for i := 0; i < b.N; i++ { + x := X + j := n + for x > 0 { + j-- + r[j] = '0' + byte(x%10) + x /= 10 + } + } +} + +func BenchmarkPrintInt2(b *testing.B) { + X := int64(123456789) + n := LenInt(X) + r := make([]byte, n) + for i := 0; i < b.N; i++ { + x := X + j := n + for x > 0 { + j-- + newX := x / 10 + r[j] = '0' + byte(x-10*newX) + x = newX + } + } +} + +var int64pow10 = []int64{ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, +} + +func BenchmarkPrintInt3(b *testing.B) { + X := int64(123456789) + n := LenInt(X) + r := make([]byte, n) + for i := 0; i < b.N; i++ { + x := X + j := 0 + for j < n { + pow := int64pow10[n-j-1] + tmp := x / pow + r[j] = '0' + byte(tmp) + j++ + x -= tmp * pow + } + } +} diff --git a/vendor/github.com/tdewolff/parse/strconv/int.go b/vendor/github.com/tdewolff/parse/strconv/int.go new file mode 100644 index 0000000..7101daa --- /dev/null +++ b/vendor/github.com/tdewolff/parse/strconv/int.go @@ -0,0 +1,78 @@ +package strconv // import "github.com/tdewolff/parse/strconv" + +import "math" + +// Int parses a byte-slice and returns the integer it represents. +// If an invalid character is encountered, it will stop there. +func ParseInt(b []byte) (int64, int) { + i := 0 + neg := false + if len(b) > 0 && (b[0] == '+' || b[0] == '-') { + neg = b[0] == '-' + i++ + } + n := uint64(0) + for i < len(b) { + c := b[i] + if n > math.MaxUint64/10 { + return 0, 0 + } else if c >= '0' && c <= '9' { + n *= 10 + n += uint64(c - '0') + } else { + break + } + i++ + } + if !neg && n > uint64(math.MaxInt64) || n > uint64(math.MaxInt64)+1 { + return 0, 0 + } else if neg { + return -int64(n), i + } + return int64(n), i +} + +func LenInt(i int64) int { + if i < 0 { + i = -i + } + switch { + case i < 10: + return 1 + case i < 100: + return 2 + case i < 1000: + return 3 + case i < 10000: + return 4 + case i < 100000: + return 5 + case i < 1000000: + return 6 + case i < 10000000: + return 7 + case i < 100000000: + return 8 + case i < 1000000000: + return 9 + case i < 10000000000: + return 10 + case i < 100000000000: + return 11 + case i < 1000000000000: + return 12 + case i < 10000000000000: + return 13 + case i < 100000000000000: + return 14 + case i < 1000000000000000: + return 15 + case i < 10000000000000000: + return 16 + case i < 100000000000000000: + return 17 + case i < 1000000000000000000: + return 18 + } + return 19 +} diff --git a/vendor/github.com/tdewolff/parse/strconv/int_test.go b/vendor/github.com/tdewolff/parse/strconv/int_test.go new file mode 100644 index 0000000..1719f45 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/strconv/int_test.go @@ -0,0 +1,95 @@ +package strconv // import "github.com/tdewolff/parse/strconv" + +import ( + "math" + "math/rand" + "testing" + + "github.com/tdewolff/test" +) + +func TestParseInt(t *testing.T) { + intTests := []struct { + i string + expected int64 + }{ + {"5", 5}, + {"99", 99}, + {"999", 999}, + {"-5", -5}, + {"+5", 5}, + {"9223372036854775807", 9223372036854775807}, + {"9223372036854775808", 0}, + {"-9223372036854775807", -9223372036854775807}, + {"-9223372036854775808", -9223372036854775808}, + {"-9223372036854775809", 0}, + {"18446744073709551620", 0}, + {"a", 0}, + } + for _, tt := range intTests { + i, _ := ParseInt([]byte(tt.i)) + test.That(t, i == tt.expected, "return", tt.expected, "for", tt.i) + } +} + +func TestLenInt(t *testing.T) { + lenIntTests := []struct { + number int64 + expected int + }{ + {0, 1}, + {1, 1}, + {10, 2}, + {99, 2}, + + // coverage + {100, 3}, + {1000, 4}, + {10000, 5}, + {100000, 6}, + {1000000, 7}, + {10000000, 8}, + {100000000, 9}, + {1000000000, 10}, + {10000000000, 11}, + {100000000000, 12}, + {1000000000000, 13}, + {10000000000000, 14}, + {100000000000000, 15}, + {1000000000000000, 16}, + {10000000000000000, 17}, + {100000000000000000, 18}, + {1000000000000000000, 19}, + } + for _, tt := range lenIntTests { + test.That(t, LenInt(tt.number) == tt.expected, "return", tt.expected, "for", tt.number) + } +} + +//////////////////////////////////////////////////////////////// + +var num []int64 + +func TestMain(t *testing.T) { + for j := 0; j < 1000; j++ { + num = append(num, rand.Int63n(1000)) + } +} + +func BenchmarkLenIntLog(b *testing.B) { + n := 0 + for i := 0; i < b.N; i++ { + for j := 0; j < 1000; j++ { + n += int(math.Log10(math.Abs(float64(num[j])))) + 1 + } + } +} + +func BenchmarkLenIntSwitch(b *testing.B) { + n := 0 + for i := 0; i < b.N; i++ { + for j := 0; j < 1000; j++ { + n += LenInt(num[j]) + } + } +} diff --git a/vendor/github.com/tdewolff/parse/svg/hash.go b/vendor/github.com/tdewolff/parse/svg/hash.go new file mode 100644 index 0000000..58528aa --- /dev/null +++ b/vendor/github.com/tdewolff/parse/svg/hash.go @@ -0,0 +1,295 @@ +package svg + +// generated by hasher -type=Hash -file=hash.go; DO NOT EDIT, except for adding more constants to the list and rerun go generate + +// uses github.com/tdewolff/hasher +//go:generate hasher -type=Hash -file=hash.go + +// Hash defines perfect hashes for a predefined list of strings +type Hash uint32 + +// Unique hash definitions to be used instead of strings +const ( + A Hash = 0x101 // a + Alignment_Baseline Hash = 0x2e12 // alignment-baseline + BaseProfile Hash = 0xb // baseProfile + Baseline_Shift Hash = 0x380e // baseline-shift + Buffered_Rendering Hash = 0x5212 // buffered-rendering + Clip Hash = 0x6404 // clip + Clip_Path Hash = 0x6409 // clip-path + Clip_Rule Hash = 0x8009 // clip-rule + Color Hash = 0xd805 // color + Color_Interpolation Hash = 0xd813 // color-interpolation + Color_Interpolation_Filters Hash = 0xd81b // color-interpolation-filters + Color_Profile Hash = 0x1ea0d // color-profile + Color_Rendering Hash = 0x2250f // color-rendering + ContentScriptType Hash = 0xa011 // contentScriptType + ContentStyleType Hash = 0xb110 // contentStyleType + Cursor Hash = 0xc106 // cursor + D Hash = 0x5901 // d + Defs Hash = 0x35c04 // defs + Direction Hash = 0x2ff09 // direction + Display Hash = 0x9807 // display + Dominant_Baseline Hash = 0x18511 // dominant-baseline + Enable_Background Hash = 0x8811 // enable-background + FeImage Hash = 0x14507 // feImage + Fill Hash = 0xc904 // fill + Fill_Opacity Hash = 0x3300c // fill-opacity + Fill_Rule Hash = 0xc909 // fill-rule + Filter Hash = 0xec06 // filter + Flood_Color Hash = 0xd20b // flood-color + Flood_Opacity Hash = 0x1050d // flood-opacity + Font Hash = 0x11404 // font + Font_Family Hash = 0x1140b // font-family + Font_Size Hash = 0x11f09 // font-size + Font_Size_Adjust Hash = 0x11f10 // font-size-adjust + Font_Stretch Hash = 0x1370c // font-stretch + Font_Style Hash = 0x14c0a // font-style + Font_Variant Hash = 0x1560c // font-variant + Font_Weight Hash = 0x1620b // font-weight + G Hash = 0x1601 // g + Glyph_Orientation_Horizontal Hash = 0x1c61c // glyph-orientation-horizontal + Glyph_Orientation_Vertical Hash = 0x161a // glyph-orientation-vertical + Height Hash = 0x6c06 // height + Href Hash = 0x14204 // href + Image Hash = 0x16d05 // image + Image_Rendering Hash = 0x16d0f // image-rendering + Kerning Hash = 0x1af07 // kerning + Letter_Spacing Hash = 0x90e // letter-spacing + Lighting_Color Hash = 0x1e10e // lighting-color + Line Hash = 0x3c04 // line + Marker Hash = 0x17c06 // marker + Marker_End Hash = 0x17c0a // marker-end + Marker_Mid Hash = 0x1960a // marker-mid + Marker_Start Hash = 0x1a00c // marker-start + Mask Hash = 0x1ac04 // mask + Metadata Hash = 0x1b608 // metadata + Missing_Glyph Hash = 0x1be0d // missing-glyph + Opacity Hash = 0x10b07 // opacity + Overflow Hash = 0x25508 // overflow + Paint_Order Hash = 0x2a10b // paint-order + Path Hash = 0x6904 // path + Pattern Hash = 0x1f707 // pattern + Pointer_Events Hash = 0x1fe0e // pointer-events + Points Hash = 0x21a06 // points + Polygon Hash = 0x23407 // polygon + Polyline Hash = 0x23b08 // polyline + PreserveAspectRatio Hash = 0x24313 // preserveAspectRatio + Rect Hash = 0x30104 // rect + Rx Hash = 0x4f02 // rx + Ry Hash = 0xc602 // ry + Script Hash = 0xf206 // script + Shape_Rendering Hash = 0x20b0f // shape-rendering + Solid_Color Hash = 0x21f0b // solid-color + Solid_Opacity Hash = 0x35f0d // solid-opacity + Stop_Color Hash = 0x12d0a // stop-color + Stop_Opacity Hash = 0x2670c // stop-opacity + Stroke Hash = 0x27306 // stroke + Stroke_Dasharray Hash = 0x27310 // stroke-dasharray + Stroke_Dashoffset Hash = 0x28311 // stroke-dashoffset + Stroke_Linecap Hash = 0x2940e // stroke-linecap + Stroke_Linejoin Hash = 0x2ac0f // stroke-linejoin + Stroke_Miterlimit Hash = 0x2bb11 // stroke-miterlimit + Stroke_Opacity Hash = 0x2cc0e // stroke-opacity + Stroke_Width Hash = 0x2da0c // stroke-width + Style Hash = 0x15105 // style + Svg Hash = 0x2e603 // svg + Switch Hash = 0x2e906 // switch + Symbol Hash = 0x2ef06 // symbol + Text_Anchor Hash = 0x450b // text-anchor + Text_Decoration Hash = 0x710f // text-decoration + Text_Rendering Hash = 0xf70e // text-rendering + Type Hash = 0x11004 // type + Unicode_Bidi Hash = 0x2f50c // unicode-bidi + Use Hash = 0x30803 // use + Vector_Effect Hash = 0x30b0d // vector-effect + Version Hash = 0x31807 // version + ViewBox Hash = 0x31f07 // viewBox + Viewport_Fill Hash = 0x3270d // viewport-fill + Viewport_Fill_Opacity Hash = 0x32715 // viewport-fill-opacity + Visibility Hash = 0x33c0a // visibility + White_Space Hash = 0x25c0b // white-space + Width Hash = 0x2e105 // width + Word_Spacing Hash = 0x3460c // word-spacing + Writing_Mode Hash = 0x3520c // writing-mode + X Hash = 0x4701 // x + X1 Hash = 0x5002 // x1 + X2 Hash = 0x32502 // x2 + Xml_Space Hash = 0x36c09 // xml:space + Y Hash = 0x1801 // y + Y1 Hash = 0x9e02 // y1 + Y2 Hash = 0xc702 // y2 +) + +// String returns the hash' name. +func (i Hash) String() string { + start := uint32(i >> 8) + n := uint32(i & 0xff) + if start+n > uint32(len(_Hash_text)) { + return "" + } + return _Hash_text[start : start+n] +} + +// ToHash returns the hash whose name is s. It returns zero if there is no +// such hash. It is case sensitive. +func ToHash(s []byte) Hash { + if len(s) == 0 || len(s) > _Hash_maxLen { + return 0 + } + h := uint32(_Hash_hash0) + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= 16777619 + } + if i := _Hash_table[h&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { + t := _Hash_text[i>>8 : i>>8+i&0xff] + for i := 0; i < len(s); i++ { + if t[i] != s[i] { + goto NEXT + } + } + return i + } +NEXT: + if i := _Hash_table[(h>>16)&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { + t := _Hash_text[i>>8 : i>>8+i&0xff] + for i := 0; i < len(s); i++ { + if t[i] != s[i] { + return 0 + } + } + return i + } + return 0 +} + +const _Hash_hash0 = 0x30372d7b +const _Hash_maxLen = 28 +const _Hash_text = "baseProfiletter-spacinglyph-orientation-verticalignment-base" + + "line-shiftext-anchorx1buffered-renderingclip-patheightext-de" + + "corationclip-rulenable-backgroundisplay1contentScriptTypecon" + + "tentStyleTypecursory2fill-ruleflood-color-interpolation-filt" + + "erscriptext-renderingflood-opacitypefont-familyfont-size-adj" + + "ustop-colorfont-stretchrefeImagefont-stylefont-variantfont-w" + + "eightimage-renderingmarker-endominant-baselinemarker-midmark" + + "er-startmaskerningmetadatamissing-glyph-orientation-horizont" + + "alighting-color-profilepatternpointer-eventshape-renderingpo" + + "intsolid-color-renderingpolygonpolylinepreserveAspectRatiove" + + "rflowhite-spacestop-opacitystroke-dasharraystroke-dashoffset" + + "stroke-linecapaint-orderstroke-linejoinstroke-miterlimitstro" + + "ke-opacitystroke-widthsvgswitchsymbolunicode-bidirectionusev" + + "ector-effectversionviewBox2viewport-fill-opacityvisibilitywo" + + "rd-spacingwriting-modefsolid-opacityxml:space" + +var _Hash_table = [1 << 7]Hash{ + 0x0: 0x2940e, // stroke-linecap + 0x1: 0x1140b, // font-family + 0x2: 0x23b08, // polyline + 0x3: 0x1f707, // pattern + 0x4: 0x30104, // rect + 0x5: 0x5212, // buffered-rendering + 0x7: 0x2f50c, // unicode-bidi + 0x8: 0x450b, // text-anchor + 0x9: 0x2bb11, // stroke-miterlimit + 0xa: 0xc909, // fill-rule + 0xb: 0x27310, // stroke-dasharray + 0xc: 0xc904, // fill + 0xd: 0x1af07, // kerning + 0xe: 0x2670c, // stop-opacity + 0x10: 0x1a00c, // marker-start + 0x11: 0x380e, // baseline-shift + 0x14: 0x17c0a, // marker-end + 0x15: 0x18511, // dominant-baseline + 0x16: 0xc602, // ry + 0x17: 0x161a, // glyph-orientation-vertical + 0x18: 0x5002, // x1 + 0x19: 0x20b0f, // shape-rendering + 0x1a: 0x32502, // x2 + 0x1b: 0x11f10, // font-size-adjust + 0x1c: 0x2250f, // color-rendering + 0x1d: 0x28311, // stroke-dashoffset + 0x1f: 0x3520c, // writing-mode + 0x20: 0x2e906, // switch + 0x21: 0xf70e, // text-rendering + 0x22: 0x23407, // polygon + 0x23: 0x3460c, // word-spacing + 0x24: 0x21f0b, // solid-color + 0x25: 0xec06, // filter + 0x26: 0x1801, // y + 0x27: 0x1be0d, // missing-glyph + 0x29: 0x11404, // font + 0x2a: 0x4f02, // rx + 0x2b: 0x9807, // display + 0x2c: 0x2e603, // svg + 0x2d: 0x1050d, // flood-opacity + 0x2f: 0x14204, // href + 0x30: 0x6404, // clip + 0x31: 0x3c04, // line + 0x32: 0x1620b, // font-weight + 0x33: 0x1c61c, // glyph-orientation-horizontal + 0x34: 0x6c06, // height + 0x35: 0x9e02, // y1 + 0x36: 0x6904, // path + 0x37: 0x31807, // version + 0x38: 0x2ac0f, // stroke-linejoin + 0x39: 0x4701, // x + 0x3a: 0x30803, // use + 0x3b: 0x2cc0e, // stroke-opacity + 0x3c: 0x15105, // style + 0x3d: 0x30b0d, // vector-effect + 0x3e: 0x14c0a, // font-style + 0x40: 0x16d05, // image + 0x41: 0x1e10e, // lighting-color + 0x42: 0xd813, // color-interpolation + 0x43: 0x27306, // stroke + 0x44: 0x2ef06, // symbol + 0x47: 0x8811, // enable-background + 0x48: 0x33c0a, // visibility + 0x49: 0x25508, // overflow + 0x4b: 0x31f07, // viewBox + 0x4c: 0x2e12, // alignment-baseline + 0x4d: 0x5901, // d + 0x4e: 0x1560c, // font-variant + 0x4f: 0x1ac04, // mask + 0x50: 0x21a06, // points + 0x51: 0x1b608, // metadata + 0x52: 0x710f, // text-decoration + 0x53: 0xd81b, // color-interpolation-filters + 0x54: 0x2ff09, // direction + 0x55: 0x6409, // clip-path + 0x56: 0x2da0c, // stroke-width + 0x59: 0x35f0d, // solid-opacity + 0x5a: 0xd805, // color + 0x5b: 0xd20b, // flood-color + 0x5c: 0x1601, // g + 0x5d: 0x2e105, // width + 0x5e: 0x1ea0d, // color-profile + 0x61: 0x35c04, // defs + 0x62: 0x1370c, // font-stretch + 0x63: 0x11004, // type + 0x64: 0x8009, // clip-rule + 0x66: 0x24313, // preserveAspectRatio + 0x67: 0x14507, // feImage + 0x68: 0x36c09, // xml:space + 0x69: 0xc106, // cursor + 0x6a: 0x16d0f, // image-rendering + 0x6b: 0x90e, // letter-spacing + 0x6c: 0xf206, // script + 0x6d: 0x12d0a, // stop-color + 0x6e: 0x101, // a + 0x70: 0x10b07, // opacity + 0x71: 0xb110, // contentStyleType + 0x72: 0x1fe0e, // pointer-events + 0x73: 0xb, // baseProfile + 0x74: 0x11f09, // font-size + 0x75: 0x3270d, // viewport-fill + 0x76: 0x3300c, // fill-opacity + 0x77: 0x25c0b, // white-space + 0x79: 0x17c06, // marker + 0x7b: 0x2a10b, // paint-order + 0x7c: 0xc702, // y2 + 0x7d: 0x32715, // viewport-fill-opacity + 0x7e: 0x1960a, // marker-mid + 0x7f: 0xa011, // contentScriptType +} diff --git a/vendor/github.com/tdewolff/parse/svg/hash_test.go b/vendor/github.com/tdewolff/parse/svg/hash_test.go new file mode 100644 index 0000000..7038a15 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/svg/hash_test.go @@ -0,0 +1,17 @@ +package svg // import "github.com/tdewolff/parse/svg" + +import ( + "testing" + + "github.com/tdewolff/test" +) + +func TestHashTable(t *testing.T) { + test.T(t, ToHash([]byte("svg")), Svg, "'svg' must resolve to hash.Svg") + test.T(t, ToHash([]byte("width")), Width, "'width' must resolve to hash.Width") + test.T(t, Svg.String(), "svg") + test.T(t, ToHash([]byte("")), Hash(0), "empty string must resolve to zero") + test.T(t, Hash(0xffffff).String(), "") + test.T(t, ToHash([]byte("svgs")), Hash(0), "'svgs' must resolve to zero") + test.T(t, ToHash([]byte("uopi")), Hash(0), "'uopi' must resolve to zero") +} diff --git a/vendor/github.com/tdewolff/parse/util.go b/vendor/github.com/tdewolff/parse/util.go new file mode 100644 index 0000000..83509a1 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/util.go @@ -0,0 +1,196 @@ +package parse // import "github.com/tdewolff/parse" + +// Copy returns a copy of the given byte slice. +func Copy(src []byte) (dst []byte) { + dst = make([]byte, len(src)) + copy(dst, src) + return +} + +// ToLower converts all characters in the byte slice from A-Z to a-z. +func ToLower(src []byte) []byte { + for i, c := range src { + if c >= 'A' && c <= 'Z' { + src[i] = c + ('a' - 'A') + } + } + return src +} + +// EqualFold returns true when s matches case-insensitively the targetLower (which must be lowercase). +func EqualFold(s, targetLower []byte) bool { + if len(s) != len(targetLower) { + return false + } + for i, c := range targetLower { + if s[i] != c && (c < 'A' && c > 'Z' || s[i]+('a'-'A') != c) { + return false + } + } + return true +} + +var whitespaceTable = [256]bool{ + // ASCII + false, false, false, false, false, false, false, false, + false, true, true, false, true, true, false, false, // tab, new line, form feed, carriage return + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + true, false, false, false, false, false, false, false, // space + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + // non-ASCII + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, +} + +// IsWhitespace returns true for space, \n, \r, \t, \f. +func IsWhitespace(c byte) bool { + return whitespaceTable[c] +} + +var newlineTable = [256]bool{ + // ASCII + false, false, false, false, false, false, false, false, + false, false, true, false, false, true, false, false, // new line, carriage return + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + // non-ASCII + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, +} + +// IsNewline returns true for \n, \r. +func IsNewline(c byte) bool { + return newlineTable[c] +} + +// IsAllWhitespace returns true when the entire byte slice consists of space, \n, \r, \t, \f. +func IsAllWhitespace(b []byte) bool { + for _, c := range b { + if !IsWhitespace(c) { + return false + } + } + return true +} + +// TrimWhitespace removes any leading and trailing whitespace characters. +func TrimWhitespace(b []byte) []byte { + n := len(b) + start := n + for i := 0; i < n; i++ { + if !IsWhitespace(b[i]) { + start = i + break + } + } + end := n + for i := n - 1; i >= start; i-- { + if !IsWhitespace(b[i]) { + end = i + 1 + break + } + } + return b[start:end] +} + +// ReplaceMultipleWhitespace replaces character series of space, \n, \t, \f, \r into a single space or newline (when the serie contained a \n or \r). +func ReplaceMultipleWhitespace(b []byte) []byte { + j := 0 + prevWS := false + hasNewline := false + for i, c := range b { + if IsWhitespace(c) { + prevWS = true + if IsNewline(c) { + hasNewline = true + } + } else { + if prevWS { + prevWS = false + if hasNewline { + hasNewline = false + b[j] = '\n' + } else { + b[j] = ' ' + } + j++ + } + b[j] = b[i] + j++ + } + } + if prevWS { + if hasNewline { + b[j] = '\n' + } else { + b[j] = ' ' + } + j++ + } + return b[:j] +} diff --git a/vendor/github.com/tdewolff/parse/util_test.go b/vendor/github.com/tdewolff/parse/util_test.go new file mode 100644 index 0000000..c08c124 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/util_test.go @@ -0,0 +1,176 @@ +package parse // import "github.com/tdewolff/parse" + +import ( + "bytes" + "math/rand" + "regexp" + "testing" + + "github.com/tdewolff/test" +) + +func helperRand(n, m int, chars []byte) [][]byte { + r := make([][]byte, n) + for i := range r { + for j := 0; j < m; j++ { + r[i] = append(r[i], chars[rand.Intn(len(chars))]) + } + } + return r +} + +//////////////////////////////////////////////////////////////// + +var wsSlices [][]byte + +func init() { + wsSlices = helperRand(100, 20, []byte("abcdefg \n\r\f\t")) +} + +func TestCopy(t *testing.T) { + foo := []byte("abc") + bar := Copy(foo) + foo[0] = 'b' + test.String(t, string(foo), "bbc") + test.String(t, string(bar), "abc") +} + +func TestToLower(t *testing.T) { + foo := []byte("Abc") + bar := ToLower(foo) + bar[1] = 'B' + test.String(t, string(foo), "aBc") + test.String(t, string(bar), "aBc") +} + +func TestEqualFold(t *testing.T) { + test.That(t, EqualFold([]byte("Abc"), []byte("abc"))) + test.That(t, !EqualFold([]byte("Abcd"), []byte("abc"))) + test.That(t, !EqualFold([]byte("Bbc"), []byte("abc"))) +} + +func TestWhitespace(t *testing.T) { + test.That(t, IsAllWhitespace([]byte("\t \r\n\f"))) + test.That(t, !IsAllWhitespace([]byte("\t \r\n\fx"))) +} + +func TestReplaceMultipleWhitespace(t *testing.T) { + wsRegexp := regexp.MustCompile("[ \t\f]+") + wsNewlinesRegexp := regexp.MustCompile("[ ]*[\r\n][ \r\n]*") + for _, e := range wsSlices { + reference := wsRegexp.ReplaceAll(e, []byte(" ")) + reference = wsNewlinesRegexp.ReplaceAll(reference, []byte("\n")) + test.Bytes(t, ReplaceMultipleWhitespace(e), reference, "must remove all multiple whitespace but keep newlines") + } +} + +func TestTrim(t *testing.T) { + test.Bytes(t, TrimWhitespace([]byte("a")), []byte("a")) + test.Bytes(t, TrimWhitespace([]byte(" a")), []byte("a")) + test.Bytes(t, TrimWhitespace([]byte("a ")), []byte("a")) + test.Bytes(t, TrimWhitespace([]byte(" ")), []byte("")) +} + +//////////////////////////////////////////////////////////////// + +func BenchmarkBytesTrim(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, e := range wsSlices { + bytes.TrimSpace(e) + } + } +} + +func BenchmarkTrim(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, e := range wsSlices { + TrimWhitespace(e) + } + } +} + +func BenchmarkReplace(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, e := range wsSlices { + ReplaceMultipleWhitespace(e) + } + } +} + +func BenchmarkWhitespaceTable(b *testing.B) { + n := 0 + for i := 0; i < b.N; i++ { + for _, e := range wsSlices { + for _, c := range e { + if IsWhitespace(c) { + n++ + } + } + } + } +} + +func BenchmarkWhitespaceIf1(b *testing.B) { + n := 0 + for i := 0; i < b.N; i++ { + for _, e := range wsSlices { + for _, c := range e { + if c == ' ' { + n++ + } + } + } + } +} + +func BenchmarkWhitespaceIf2(b *testing.B) { + n := 0 + for i := 0; i < b.N; i++ { + for _, e := range wsSlices { + for _, c := range e { + if c == ' ' || c == '\n' { + n++ + } + } + } + } +} + +func BenchmarkWhitespaceIf3(b *testing.B) { + n := 0 + for i := 0; i < b.N; i++ { + for _, e := range wsSlices { + for _, c := range e { + if c == ' ' || c == '\n' || c == '\r' { + n++ + } + } + } + } +} + +func BenchmarkWhitespaceIf4(b *testing.B) { + n := 0 + for i := 0; i < b.N; i++ { + for _, e := range wsSlices { + for _, c := range e { + if c == ' ' || c == '\n' || c == '\r' || c == '\t' { + n++ + } + } + } + } +} + +func BenchmarkWhitespaceIf5(b *testing.B) { + n := 0 + for i := 0; i < b.N; i++ { + for _, e := range wsSlices { + for _, c := range e { + if c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\f' { + n++ + } + } + } + } +} diff --git a/vendor/github.com/tdewolff/parse/xml/README.md b/vendor/github.com/tdewolff/parse/xml/README.md new file mode 100644 index 0000000..3aaf6f4 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/xml/README.md @@ -0,0 +1,101 @@ +# XML [![GoDoc](http://godoc.org/github.com/tdewolff/parse/xml?status.svg)](http://godoc.org/github.com/tdewolff/parse/xml) [![GoCover](http://gocover.io/_badge/github.com/tdewolff/parse/xml)](http://gocover.io/github.com/tdewolff/parse/xml) + +This package is an XML lexer written in [Go][1]. It follows the specification at [Extensible Markup Language (XML) 1.0 (Fifth Edition)](http://www.w3.org/TR/REC-xml/). The lexer takes an io.Reader and converts it into tokens until the EOF. + +## Installation +Run the following command + + go get github.com/tdewolff/parse/xml + +or add the following import and run project with `go get` + + import "github.com/tdewolff/parse/xml" + +## Lexer +### Usage +The following initializes a new Lexer with io.Reader `r`: +``` go +l := xml.NewLexer(r) +``` + +To tokenize until EOF an error, use: +``` go +for { + tt, data := l.Next() + switch tt { + case xml.ErrorToken: + // error or EOF set in l.Err() + return + case xml.StartTagToken: + // ... + for { + ttAttr, dataAttr := l.Next() + if ttAttr != xml.AttributeToken { + // handle StartTagCloseToken/StartTagCloseVoidToken/StartTagClosePIToken + break + } + // ... + } + case xml.EndTagToken: + // ... + } +} +``` + +All tokens: +``` go +ErrorToken TokenType = iota // extra token when errors occur +CommentToken +CDATAToken +StartTagToken +StartTagCloseToken +StartTagCloseVoidToken +StartTagClosePIToken +EndTagToken +AttributeToken +TextToken +``` + +### Examples +``` go +package main + +import ( + "os" + + "github.com/tdewolff/parse/xml" +) + +// Tokenize XML from stdin. +func main() { + l := xml.NewLexer(os.Stdin) + for { + tt, data := l.Next() + switch tt { + case xml.ErrorToken: + if l.Err() != io.EOF { + fmt.Println("Error on line", l.Line(), ":", l.Err()) + } + return + case xml.StartTagToken: + fmt.Println("Tag", string(data)) + for { + ttAttr, dataAttr := l.Next() + if ttAttr != xml.AttributeToken { + break + } + + key := dataAttr + val := l.AttrVal() + fmt.Println("Attribute", string(key), "=", string(val)) + } + // ... + } + } +} +``` + +## License +Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). + +[1]: http://golang.org/ "Go Language" diff --git a/vendor/github.com/tdewolff/parse/xml/lex.go b/vendor/github.com/tdewolff/parse/xml/lex.go new file mode 100644 index 0000000..0f1393c --- /dev/null +++ b/vendor/github.com/tdewolff/parse/xml/lex.go @@ -0,0 +1,345 @@ +// Package xml is an XML1.0 lexer following the specifications at http://www.w3.org/TR/xml/. +package xml // import "github.com/tdewolff/parse/xml" + +import ( + "io" + "strconv" + + "github.com/tdewolff/parse" + "github.com/tdewolff/parse/buffer" +) + +// TokenType determines the type of token, eg. a number or a semicolon. +type TokenType uint32 + +// TokenType values. +const ( + ErrorToken TokenType = iota // extra token when errors occur + CommentToken + DOCTYPEToken + CDATAToken + StartTagToken + StartTagPIToken + StartTagCloseToken + StartTagCloseVoidToken + StartTagClosePIToken + EndTagToken + AttributeToken + TextToken +) + +// String returns the string representation of a TokenType. +func (tt TokenType) String() string { + switch tt { + case ErrorToken: + return "Error" + case CommentToken: + return "Comment" + case DOCTYPEToken: + return "DOCTYPE" + case CDATAToken: + return "CDATA" + case StartTagToken: + return "StartTag" + case StartTagPIToken: + return "StartTagPI" + case StartTagCloseToken: + return "StartTagClose" + case StartTagCloseVoidToken: + return "StartTagCloseVoid" + case StartTagClosePIToken: + return "StartTagClosePI" + case EndTagToken: + return "EndTag" + case AttributeToken: + return "Attribute" + case TextToken: + return "Text" + } + return "Invalid(" + strconv.Itoa(int(tt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// Lexer is the state for the lexer. +type Lexer struct { + r *buffer.Lexer + err error + + inTag bool + + text []byte + attrVal []byte +} + +// NewLexer returns a new Lexer for a given io.Reader. +func NewLexer(r io.Reader) *Lexer { + return &Lexer{ + r: buffer.NewLexer(r), + } +} + +// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned. +func (l *Lexer) Err() error { + err := l.r.Err() + if err != nil { + return err + } + return l.err +} + +// Restore restores the NULL byte at the end of the buffer. +func (l *Lexer) Restore() { + l.r.Restore() +} + +// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message. +func (l *Lexer) Next() (TokenType, []byte) { + l.text = nil + var c byte + if l.inTag { + l.attrVal = nil + for { // before attribute name state + if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' { + l.r.Move(1) + continue + } + break + } + if c == 0 { + l.err = parse.NewErrorLexer("unexpected null character", l.r) + return ErrorToken, nil + } else if c != '>' && (c != '/' && c != '?' || l.r.Peek(1) != '>') { + return AttributeToken, l.shiftAttribute() + } + start := l.r.Pos() + l.inTag = false + if c == '/' { + l.r.Move(2) + l.text = l.r.Lexeme()[start:] + return StartTagCloseVoidToken, l.r.Shift() + } else if c == '?' { + l.r.Move(2) + l.text = l.r.Lexeme()[start:] + return StartTagClosePIToken, l.r.Shift() + } else { + l.r.Move(1) + l.text = l.r.Lexeme()[start:] + return StartTagCloseToken, l.r.Shift() + } + } + + for { + c = l.r.Peek(0) + if c == '<' { + if l.r.Pos() > 0 { + return TextToken, l.r.Shift() + } + c = l.r.Peek(1) + if c == '/' { + l.r.Move(2) + return EndTagToken, l.shiftEndTag() + } else if c == '!' { + l.r.Move(2) + if l.at('-', '-') { + l.r.Move(2) + return CommentToken, l.shiftCommentText() + } else if l.at('[', 'C', 'D', 'A', 'T', 'A', '[') { + l.r.Move(7) + return CDATAToken, l.shiftCDATAText() + } else if l.at('D', 'O', 'C', 'T', 'Y', 'P', 'E') { + l.r.Move(8) + return DOCTYPEToken, l.shiftDOCTYPEText() + } + l.r.Move(-2) + } else if c == '?' { + l.r.Move(2) + l.inTag = true + return StartTagPIToken, l.shiftStartTag() + } + l.r.Move(1) + l.inTag = true + return StartTagToken, l.shiftStartTag() + } else if c == 0 { + if l.r.Pos() > 0 { + return TextToken, l.r.Shift() + } + l.err = parse.NewErrorLexer("unexpected null character", l.r) + return ErrorToken, nil + } + l.r.Move(1) + } +} + +// Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters. +func (l *Lexer) Text() []byte { + return l.text +} + +// AttrVal returns the attribute value when an AttributeToken was returned from Next. +func (l *Lexer) AttrVal() []byte { + return l.attrVal +} + +//////////////////////////////////////////////////////////////// + +// The following functions follow the specifications at http://www.w3.org/html/wg/drafts/html/master/syntax.html + +func (l *Lexer) shiftDOCTYPEText() []byte { + inString := false + inBrackets := false + for { + c := l.r.Peek(0) + if c == '"' { + inString = !inString + } else if (c == '[' || c == ']') && !inString { + inBrackets = (c == '[') + } else if c == '>' && !inString && !inBrackets { + l.text = l.r.Lexeme()[9:] + l.r.Move(1) + return l.r.Shift() + } else if c == 0 { + l.text = l.r.Lexeme()[9:] + return l.r.Shift() + } + l.r.Move(1) + } +} + +func (l *Lexer) shiftCDATAText() []byte { + for { + c := l.r.Peek(0) + if c == ']' && l.r.Peek(1) == ']' && l.r.Peek(2) == '>' { + l.text = l.r.Lexeme()[9:] + l.r.Move(3) + return l.r.Shift() + } else if c == 0 { + l.text = l.r.Lexeme()[9:] + return l.r.Shift() + } + l.r.Move(1) + } +} + +func (l *Lexer) shiftCommentText() []byte { + for { + c := l.r.Peek(0) + if c == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' { + l.text = l.r.Lexeme()[4:] + l.r.Move(3) + return l.r.Shift() + } else if c == 0 { + return l.r.Shift() + } + l.r.Move(1) + } +} + +func (l *Lexer) shiftStartTag() []byte { + nameStart := l.r.Pos() + for { + if c := l.r.Peek(0); c == ' ' || c == '>' || (c == '/' || c == '?') && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == 0 { + break + } + l.r.Move(1) + } + l.text = l.r.Lexeme()[nameStart:] + return l.r.Shift() +} + +func (l *Lexer) shiftAttribute() []byte { + nameStart := l.r.Pos() + var c byte + for { // attribute name state + if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || (c == '/' || c == '?') && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == 0 { + break + } + l.r.Move(1) + } + nameEnd := l.r.Pos() + for { // after attribute name state + if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' { + l.r.Move(1) + continue + } + break + } + if c == '=' { + l.r.Move(1) + for { // before attribute value state + if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' { + l.r.Move(1) + continue + } + break + } + attrPos := l.r.Pos() + delim := c + if delim == '"' || delim == '\'' { // attribute value single- and double-quoted state + l.r.Move(1) + for { + c = l.r.Peek(0) + if c == delim { + l.r.Move(1) + break + } else if c == 0 { + break + } + l.r.Move(1) + if c == '\t' || c == '\n' || c == '\r' { + l.r.Lexeme()[l.r.Pos()-1] = ' ' + } + } + } else { // attribute value unquoted state + for { + if c = l.r.Peek(0); c == ' ' || c == '>' || (c == '/' || c == '?') && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == 0 { + break + } + l.r.Move(1) + } + } + l.attrVal = l.r.Lexeme()[attrPos:] + } else { + l.r.Rewind(nameEnd) + l.attrVal = nil + } + l.text = l.r.Lexeme()[nameStart:nameEnd] + return l.r.Shift() +} + +func (l *Lexer) shiftEndTag() []byte { + for { + c := l.r.Peek(0) + if c == '>' { + l.text = l.r.Lexeme()[2:] + l.r.Move(1) + break + } else if c == 0 { + l.text = l.r.Lexeme()[2:] + break + } + l.r.Move(1) + } + + end := len(l.text) + for end > 0 { + if c := l.text[end-1]; c == ' ' || c == '\t' || c == '\n' || c == '\r' { + end-- + continue + } + break + } + l.text = l.text[:end] + return l.r.Shift() +} + +//////////////////////////////////////////////////////////////// + +func (l *Lexer) at(b ...byte) bool { + for i, c := range b { + if l.r.Peek(i) != c { + return false + } + } + return true +} diff --git a/vendor/github.com/tdewolff/parse/xml/lex_test.go b/vendor/github.com/tdewolff/parse/xml/lex_test.go new file mode 100644 index 0000000..f8cdd17 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/xml/lex_test.go @@ -0,0 +1,193 @@ +package xml // import "github.com/tdewolff/parse/xml" + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/tdewolff/parse" + "github.com/tdewolff/test" +) + +type TTs []TokenType + +func TestTokens(t *testing.T) { + var tokenTests = []struct { + xml string + expected []TokenType + }{ + {"", TTs{}}, + {"<!-- comment -->", TTs{CommentToken}}, + {"<!-- comment \n multi \r line -->", TTs{CommentToken}}, + {"<foo/>", TTs{StartTagToken, StartTagCloseVoidToken}}, + {"<foo \t\r\n/>", TTs{StartTagToken, StartTagCloseVoidToken}}, + {"<foo:bar.qux-norf/>", TTs{StartTagToken, StartTagCloseVoidToken}}, + {"<foo></foo>", TTs{StartTagToken, StartTagCloseToken, EndTagToken}}, + {"<foo>text</foo>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}}, + {"<foo/> text", TTs{StartTagToken, StartTagCloseVoidToken, TextToken}}, + {"<a> <b> <c>text</c> </b> </a>", TTs{StartTagToken, StartTagCloseToken, TextToken, StartTagToken, StartTagCloseToken, TextToken, StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken, TextToken, EndTagToken}}, + {"<foo a='a' b=\"b\" c=c/>", TTs{StartTagToken, AttributeToken, AttributeToken, AttributeToken, StartTagCloseVoidToken}}, + {"<foo a=\"\"/>", TTs{StartTagToken, AttributeToken, StartTagCloseVoidToken}}, + {"<foo a-b=\"\"/>", TTs{StartTagToken, AttributeToken, StartTagCloseVoidToken}}, + {"<foo \nchecked \r\n value\r=\t'=/>\"' />", TTs{StartTagToken, AttributeToken, AttributeToken, StartTagCloseVoidToken}}, + {"<?xml?>", TTs{StartTagPIToken, StartTagClosePIToken}}, + {"<?xml a=\"a\" ?>", TTs{StartTagPIToken, AttributeToken, StartTagClosePIToken}}, + {"<?xml a=a?>", TTs{StartTagPIToken, AttributeToken, StartTagClosePIToken}}, + {"<![CDATA[ test ]]>", TTs{CDATAToken}}, + {"<!DOCTYPE>", TTs{DOCTYPEToken}}, + {"<!DOCTYPE note SYSTEM \"Note.dtd\">", TTs{DOCTYPEToken}}, + {`<!DOCTYPE note [<!ENTITY nbsp " "><!ENTITY writer "Writer: Donald Duck."><!ENTITY copyright "Copyright:]> W3Schools.">]>`, TTs{DOCTYPEToken}}, + {"<!foo>", TTs{StartTagToken, StartTagCloseToken}}, + + // early endings + {"<!-- comment", TTs{CommentToken}}, + {"<foo", TTs{StartTagToken}}, + {"</foo", TTs{EndTagToken}}, + {"<foo x", TTs{StartTagToken, AttributeToken}}, + {"<foo x=", TTs{StartTagToken, AttributeToken}}, + {"<foo x='", TTs{StartTagToken, AttributeToken}}, + {"<foo x=''", TTs{StartTagToken, AttributeToken}}, + {"<?xml", TTs{StartTagPIToken}}, + {"<![CDATA[ test", TTs{CDATAToken}}, + {"<!DOCTYPE note SYSTEM", TTs{DOCTYPEToken}}, + + // go fuzz + {"</", TTs{EndTagToken}}, + {"</\n", TTs{EndTagToken}}, + } + for _, tt := range tokenTests { + t.Run(tt.xml, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.xml)) + i := 0 + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } + test.That(t, i < len(tt.expected), "index", i, "must not exceed expected token types size", len(tt.expected)) + if i < len(tt.expected) { + test.T(t, token, tt.expected[i], "token types must match") + } + i++ + } + }) + } + + test.T(t, TokenType(100).String(), "Invalid(100)") +} + +func TestTags(t *testing.T) { + var tagTests = []struct { + xml string + expected string + }{ + {"<foo:bar.qux-norf/>", "foo:bar.qux-norf"}, + {"<?xml?>", "xml"}, + {"<foo?bar/qux>", "foo?bar/qux"}, + {"<!DOCTYPE note SYSTEM \"Note.dtd\">", " note SYSTEM \"Note.dtd\""}, + + // early endings + {"<foo ", "foo"}, + } + for _, tt := range tagTests { + t.Run(tt.xml, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.xml)) + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.Fail(t, "when error occurred we must be at the end") + break + } else if token == StartTagToken || token == StartTagPIToken || token == EndTagToken || token == DOCTYPEToken { + test.String(t, string(l.Text()), tt.expected, "tags must match") + break + } + } + }) + } +} + +func TestAttributes(t *testing.T) { + var attributeTests = []struct { + attr string + expected []string + }{ + {"<foo a=\"b\" />", []string{"a", "\"b\""}}, + {"<foo \nchecked \r\n value\r=\t'=/>\"' />", []string{"checked", "", "value", "'=/>\"'"}}, + {"<foo bar=\" a \n\t\r b \" />", []string{"bar", "\" a b \""}}, + {"<?xml a=b?>", []string{"a", "b"}}, + {"<foo /=? >", []string{"/", "?"}}, + + // early endings + {"<foo x", []string{"x", ""}}, + {"<foo x=", []string{"x", ""}}, + {"<foo x='", []string{"x", "'"}}, + } + for _, tt := range attributeTests { + t.Run(tt.attr, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.attr)) + i := 0 + for { + token, _ := l.Next() + if token == ErrorToken { + test.T(t, l.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } else if token == AttributeToken { + test.That(t, i+1 < len(tt.expected), "index", i+1, "must not exceed expected attributes size", len(tt.expected)) + if i+1 < len(tt.expected) { + test.String(t, string(l.Text()), tt.expected[i], "attribute keys must match") + test.String(t, string(l.AttrVal()), tt.expected[i+1], "attribute keys must match") + i += 2 + } + } + } + }) + } +} + +func TestErrors(t *testing.T) { + var errorTests = []struct { + xml string + col int + }{ + {"a\x00b", 2}, + } + for _, tt := range errorTests { + t.Run(tt.xml, func(t *testing.T) { + l := NewLexer(bytes.NewBufferString(tt.xml)) + for { + token, _ := l.Next() + if token == ErrorToken { + if tt.col == 0 { + test.T(t, l.Err(), io.EOF) + } else if perr, ok := l.Err().(*parse.Error); ok { + test.T(t, perr.Col, tt.col) + } else { + test.Fail(t, "bad error:", l.Err()) + } + break + } + } + }) + } +} + +//////////////////////////////////////////////////////////////// + +func ExampleNewLexer() { + l := NewLexer(bytes.NewBufferString("<span class='user'>John Doe</span>")) + out := "" + for { + tt, data := l.Next() + if tt == ErrorToken { + break + } + out += string(data) + } + fmt.Println(out) + // Output: <span class='user'>John Doe</span> +} diff --git a/vendor/github.com/tdewolff/parse/xml/util.go b/vendor/github.com/tdewolff/parse/xml/util.go new file mode 100644 index 0000000..1501b9b --- /dev/null +++ b/vendor/github.com/tdewolff/parse/xml/util.go @@ -0,0 +1,108 @@ +package xml // import "github.com/tdewolff/parse/xml" + +import "github.com/tdewolff/parse" + +var ( + ltEntityBytes = []byte("<") + ampEntityBytes = []byte("&") + singleQuoteEntityBytes = []byte("'") + doubleQuoteEntityBytes = []byte(""") +) + +// EscapeAttrVal returns the escape attribute value bytes without quotes. +func EscapeAttrVal(buf *[]byte, b []byte) []byte { + singles := 0 + doubles := 0 + for i, c := range b { + if c == '&' { + if quote, n := parse.QuoteEntity(b[i:]); n > 0 { + if quote == '"' { + doubles++ + } else { + singles++ + } + } + } else if c == '"' { + doubles++ + } else if c == '\'' { + singles++ + } + } + + n := len(b) + 2 + var quote byte + var escapedQuote []byte + if doubles > singles { + n += singles * 4 + quote = '\'' + escapedQuote = singleQuoteEntityBytes + } else { + n += doubles * 4 + quote = '"' + escapedQuote = doubleQuoteEntityBytes + } + if n > cap(*buf) { + *buf = make([]byte, 0, n) // maximum size, not actual size + } + t := (*buf)[:n] // maximum size, not actual size + t[0] = quote + j := 1 + start := 0 + for i, c := range b { + if c == '&' { + if entityQuote, n := parse.QuoteEntity(b[i:]); n > 0 { + j += copy(t[j:], b[start:i]) + if entityQuote != quote { + t[j] = entityQuote + j++ + } else { + j += copy(t[j:], escapedQuote) + } + start = i + n + } + } else if c == quote { + j += copy(t[j:], b[start:i]) + j += copy(t[j:], escapedQuote) + start = i + 1 + } + } + j += copy(t[j:], b[start:]) + t[j] = quote + return t[:j+1] +} + +// EscapeCDATAVal returns the escaped text bytes. +func EscapeCDATAVal(buf *[]byte, b []byte) ([]byte, bool) { + n := 0 + for _, c := range b { + if c == '<' || c == '&' { + if c == '<' { + n += 3 // < + } else { + n += 4 // & + } + if n > len("<![CDATA[]]>") { + return b, false + } + } + } + if len(b)+n > cap(*buf) { + *buf = make([]byte, 0, len(b)+n) + } + t := (*buf)[:len(b)+n] + j := 0 + start := 0 + for i, c := range b { + if c == '<' { + j += copy(t[j:], b[start:i]) + j += copy(t[j:], ltEntityBytes) + start = i + 1 + } else if c == '&' { + j += copy(t[j:], b[start:i]) + j += copy(t[j:], ampEntityBytes) + start = i + 1 + } + } + j += copy(t[j:], b[start:]) + return t[:j], true +} diff --git a/vendor/github.com/tdewolff/parse/xml/util_test.go b/vendor/github.com/tdewolff/parse/xml/util_test.go new file mode 100644 index 0000000..65be6b8 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/xml/util_test.go @@ -0,0 +1,63 @@ +package xml // import "github.com/tdewolff/parse/xml" + +import ( + "testing" + + "github.com/tdewolff/test" +) + +func TestEscapeAttrVal(t *testing.T) { + var attrValTests = []struct { + attrVal string + expected string + }{ + {"xyz", "\"xyz\""}, + {"", "\"\""}, + {"x&z", "\"x&z\""}, + {"x'z", "\"x'z\""}, + {"x\"z", "'x\"z'"}, + {"a'b=\"\"", "'a'b=\"\"'"}, + {"'x'\"'z'", "\"x'"'z\""}, + {"\"x"'"z\"", "'x\"'\"z'"}, + {"a'b=\"\"", "'a'b=\"\"'"}, + } + var buf []byte + for _, tt := range attrValTests { + t.Run(tt.attrVal, func(t *testing.T) { + b := []byte(tt.attrVal) + if len(b) > 1 && (b[0] == '"' || b[0] == '\'') && b[0] == b[len(b)-1] { + b = b[1 : len(b)-1] + } + val := EscapeAttrVal(&buf, []byte(b)) + test.String(t, string(val), tt.expected) + }) + } +} + +func TestEscapeCDATAVal(t *testing.T) { + var CDATAValTests = []struct { + CDATAVal string + expected string + }{ + {"<![CDATA[<b>]]>", "<b>"}, + {"<![CDATA[abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz]]>", "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"}, + {"<![CDATA[ <b> ]]>", " <b> "}, + {"<![CDATA[<<<<<]]>", "<![CDATA[<<<<<]]>"}, + {"<![CDATA[&]]>", "&"}, + {"<![CDATA[&&&&]]>", "<![CDATA[&&&&]]>"}, + {"<![CDATA[ a ]]>", " a "}, + {"<![CDATA[]]>", ""}, + } + var buf []byte + for _, tt := range CDATAValTests { + t.Run(tt.CDATAVal, func(t *testing.T) { + b := []byte(tt.CDATAVal[len("<![CDATA[") : len(tt.CDATAVal)-len("]]>")]) + data, useText := EscapeCDATAVal(&buf, b) + text := string(data) + if !useText { + text = "<![CDATA[" + text + "]]>" + } + test.String(t, text, tt.expected) + }) + } +} |