diff options
Diffstat (limited to 'vendor/github.com/tdewolff/parse/js')
-rw-r--r-- | vendor/github.com/tdewolff/parse/js/README.md | 89 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/parse/js/hash.go | 156 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/parse/js/hash_test.go | 18 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/parse/js/lex.go | 669 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/parse/js/lex_test.go | 155 |
5 files changed, 0 insertions, 1087 deletions
diff --git a/vendor/github.com/tdewolff/parse/js/README.md b/vendor/github.com/tdewolff/parse/js/README.md deleted file mode 100644 index 7b06d7c..0000000 --- a/vendor/github.com/tdewolff/parse/js/README.md +++ /dev/null @@ -1,89 +0,0 @@ -# JS [![GoDoc](http://godoc.org/github.com/tdewolff/parse/js?status.svg)](http://godoc.org/github.com/tdewolff/parse/js) [![GoCover](http://gocover.io/_badge/github.com/tdewolff/parse/js)](http://gocover.io/github.com/tdewolff/parse/js) - -This package is a JS lexer (ECMA-262, edition 6.0) written in [Go][1]. It follows the specification at [ECMAScript Language Specification](http://www.ecma-international.org/ecma-262/6.0/). The lexer takes an io.Reader and converts it into tokens until the EOF. - -## Installation -Run the following command - - go get github.com/tdewolff/parse/js - -or add the following import and run project with `go get` - - import "github.com/tdewolff/parse/js" - -## Lexer -### Usage -The following initializes a new Lexer with io.Reader `r`: -``` go -l := js.NewLexer(r) -``` - -To tokenize until EOF an error, use: -``` go -for { - tt, text := l.Next() - switch tt { - case js.ErrorToken: - // error or EOF set in l.Err() - return - // ... - } -} -``` - -All tokens (see [ECMAScript Language Specification](http://www.ecma-international.org/ecma-262/6.0/)): -``` go -ErrorToken TokenType = iota // extra token when errors occur -UnknownToken // extra token when no token can be matched -WhitespaceToken // space \t \v \f -LineTerminatorToken // \r \n \r\n -CommentToken -IdentifierToken // also: null true false -PunctuatorToken /* { } ( ) [ ] . ; , < > <= >= == != === !== + - * % ++ -- << >> - >>> & | ^ ! ~ && || ? : = += -= *= %= <<= >>= >>>= &= |= ^= / /= => */ -NumericToken -StringToken -RegexpToken -TemplateToken -``` - -### Quirks -Because the ECMAScript specification for `PunctuatorToken` (of which the `/` and `/=` symbols) and `RegexpToken` depends on a parser state to differentiate between the two, the lexer (to remain modular) uses different rules. It aims to correctly disambiguate contexts and returns `RegexpToken` or `PunctuatorToken` where appropriate with only few exceptions which don't make much sense in runtime and so don't happen in a real-world code: function literal division (`x = function y(){} / z`) and object literal division (`x = {y:1} / z`). - -Another interesting case introduced by ES2015 is `yield` operator in function generators vs `yield` as an identifier in regular functions. This was done for backward compatibility, but is very hard to disambiguate correctly on a lexer level without essentially implementing entire parsing spec as a state machine and hurting performance, code readability and maintainability, so, instead, `yield` is just always assumed to be an operator. In combination with above paragraph, this means that, for example, `yield /x/i` will be always parsed as `yield`-ing regular expression and not as `yield` identifier divided by `x` and then `i`. There is no evidence though that this pattern occurs in any popular libraries. - -### Examples -``` go -package main - -import ( - "os" - - "github.com/tdewolff/parse/js" -) - -// Tokenize JS from stdin. -func main() { - l := js.NewLexer(os.Stdin) - for { - tt, text := l.Next() - switch tt { - case js.ErrorToken: - if l.Err() != io.EOF { - fmt.Println("Error on line", l.Line(), ":", l.Err()) - } - return - case js.IdentifierToken: - fmt.Println("Identifier", string(text)) - case js.NumericToken: - fmt.Println("Numeric", string(text)) - // ... - } - } -} -``` - -## License -Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). - -[1]: http://golang.org/ "Go Language" diff --git a/vendor/github.com/tdewolff/parse/js/hash.go b/vendor/github.com/tdewolff/parse/js/hash.go deleted file mode 100644 index 3de86b2..0000000 --- a/vendor/github.com/tdewolff/parse/js/hash.go +++ /dev/null @@ -1,156 +0,0 @@ -package js - -// generated by hasher -file hash.go -type Hash; DO NOT EDIT, except for adding more constants to the list and rerun go generate - -// uses github.com/tdewolff/hasher -//go:generate hasher -type=Hash -file=hash.go - -// Hash defines perfect hashes for a predefined list of strings -type Hash uint32 - -// Unique hash definitions to be used instead of strings -const ( - Break Hash = 0x5 // break - Case Hash = 0x3404 // case - Catch Hash = 0xba05 // catch - Class Hash = 0x505 // class - Const Hash = 0x2c05 // const - Continue Hash = 0x3e08 // continue - Debugger Hash = 0x8408 // debugger - Default Hash = 0xab07 // default - Delete Hash = 0xcd06 // delete - Do Hash = 0x4c02 // do - Else Hash = 0x3704 // else - Enum Hash = 0x3a04 // enum - Export Hash = 0x1806 // export - Extends Hash = 0x4507 // extends - False Hash = 0x5a05 // false - Finally Hash = 0x7a07 // finally - For Hash = 0xc403 // for - Function Hash = 0x4e08 // function - If Hash = 0x5902 // if - Implements Hash = 0x5f0a // implements - Import Hash = 0x6906 // import - In Hash = 0x4202 // in - Instanceof Hash = 0x710a // instanceof - Interface Hash = 0x8c09 // interface - Let Hash = 0xcf03 // let - New Hash = 0x1203 // new - Null Hash = 0x5504 // null - Package Hash = 0x9507 // package - Private Hash = 0x9c07 // private - Protected Hash = 0xa309 // protected - Public Hash = 0xb506 // public - Return Hash = 0xd06 // return - Static Hash = 0x2f06 // static - Super Hash = 0x905 // super - Switch Hash = 0x2606 // switch - This Hash = 0x2304 // this - Throw Hash = 0x1d05 // throw - True Hash = 0xb104 // true - Try Hash = 0x6e03 // try - Typeof Hash = 0xbf06 // typeof - Var Hash = 0xc703 // var - Void Hash = 0xca04 // void - While Hash = 0x1405 // while - With Hash = 0x2104 // with - Yield Hash = 0x8005 // yield -) - -// String returns the hash' name. -func (i Hash) String() string { - start := uint32(i >> 8) - n := uint32(i & 0xff) - if start+n > uint32(len(_Hash_text)) { - return "" - } - return _Hash_text[start : start+n] -} - -// ToHash returns the hash whose name is s. It returns zero if there is no -// such hash. It is case sensitive. -func ToHash(s []byte) Hash { - if len(s) == 0 || len(s) > _Hash_maxLen { - return 0 - } - h := uint32(_Hash_hash0) - for i := 0; i < len(s); i++ { - h ^= uint32(s[i]) - h *= 16777619 - } - if i := _Hash_table[h&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { - t := _Hash_text[i>>8 : i>>8+i&0xff] - for i := 0; i < len(s); i++ { - if t[i] != s[i] { - goto NEXT - } - } - return i - } -NEXT: - if i := _Hash_table[(h>>16)&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { - t := _Hash_text[i>>8 : i>>8+i&0xff] - for i := 0; i < len(s); i++ { - if t[i] != s[i] { - return 0 - } - } - return i - } - return 0 -} - -const _Hash_hash0 = 0x9acb0442 -const _Hash_maxLen = 10 -const _Hash_text = "breakclassupereturnewhilexporthrowithiswitchconstaticaselsen" + - "umcontinuextendsdofunctionullifalseimplementsimportryinstanc" + - "eofinallyieldebuggerinterfacepackageprivateprotectedefaultru" + - "epublicatchtypeoforvarvoidelete" - -var _Hash_table = [1 << 6]Hash{ - 0x0: 0x2f06, // static - 0x1: 0x9c07, // private - 0x3: 0xb104, // true - 0x6: 0x5a05, // false - 0x7: 0x4c02, // do - 0x9: 0x2c05, // const - 0xa: 0x2606, // switch - 0xb: 0x6e03, // try - 0xc: 0x1203, // new - 0xd: 0x4202, // in - 0xf: 0x8005, // yield - 0x10: 0x5f0a, // implements - 0x11: 0xc403, // for - 0x12: 0x505, // class - 0x13: 0x3a04, // enum - 0x16: 0xc703, // var - 0x17: 0x5902, // if - 0x19: 0xcf03, // let - 0x1a: 0x9507, // package - 0x1b: 0xca04, // void - 0x1c: 0xcd06, // delete - 0x1f: 0x5504, // null - 0x20: 0x1806, // export - 0x21: 0xd06, // return - 0x23: 0x4507, // extends - 0x25: 0x2304, // this - 0x26: 0x905, // super - 0x27: 0x1405, // while - 0x29: 0x5, // break - 0x2b: 0x3e08, // continue - 0x2e: 0x3404, // case - 0x2f: 0xab07, // default - 0x31: 0x8408, // debugger - 0x32: 0x1d05, // throw - 0x33: 0xbf06, // typeof - 0x34: 0x2104, // with - 0x35: 0xba05, // catch - 0x36: 0x4e08, // function - 0x37: 0x710a, // instanceof - 0x38: 0xa309, // protected - 0x39: 0x8c09, // interface - 0x3b: 0xb506, // public - 0x3c: 0x3704, // else - 0x3d: 0x7a07, // finally - 0x3f: 0x6906, // import -} diff --git a/vendor/github.com/tdewolff/parse/js/hash_test.go b/vendor/github.com/tdewolff/parse/js/hash_test.go deleted file mode 100644 index fa6a213..0000000 --- a/vendor/github.com/tdewolff/parse/js/hash_test.go +++ /dev/null @@ -1,18 +0,0 @@ -package js // import "github.com/tdewolff/parse/js" - -import ( - "testing" - - "github.com/tdewolff/test" -) - -func TestHashTable(t *testing.T) { - test.T(t, ToHash([]byte("break")), Break, "'break' must resolve to hash.Break") - test.T(t, ToHash([]byte("var")), Var, "'var' must resolve to hash.Var") - test.T(t, Break.String(), "break") - test.T(t, ToHash([]byte("")), Hash(0), "empty string must resolve to zero") - test.T(t, Hash(0xffffff).String(), "") - test.T(t, ToHash([]byte("breaks")), Hash(0), "'breaks' must resolve to zero") - test.T(t, ToHash([]byte("sdf")), Hash(0), "'sdf' must resolve to zero") - test.T(t, ToHash([]byte("uio")), Hash(0), "'uio' must resolve to zero") -} diff --git a/vendor/github.com/tdewolff/parse/js/lex.go b/vendor/github.com/tdewolff/parse/js/lex.go deleted file mode 100644 index ce4e1d5..0000000 --- a/vendor/github.com/tdewolff/parse/js/lex.go +++ /dev/null @@ -1,669 +0,0 @@ -// Package js is an ECMAScript5.1 lexer following the specifications at http://www.ecma-international.org/ecma-262/5.1/. -package js // import "github.com/tdewolff/parse/js" - -import ( - "io" - "strconv" - "unicode" - - "github.com/tdewolff/parse/buffer" -) - -var identifierStart = []*unicode.RangeTable{unicode.Lu, unicode.Ll, unicode.Lt, unicode.Lm, unicode.Lo, unicode.Nl, unicode.Other_ID_Start} -var identifierContinue = []*unicode.RangeTable{unicode.Lu, unicode.Ll, unicode.Lt, unicode.Lm, unicode.Lo, unicode.Nl, unicode.Mn, unicode.Mc, unicode.Nd, unicode.Pc, unicode.Other_ID_Continue} - -//////////////////////////////////////////////////////////////// - -// TokenType determines the type of token, eg. a number or a semicolon. -type TokenType uint32 - -// TokenType values. -const ( - ErrorToken TokenType = iota // extra token when errors occur - UnknownToken // extra token when no token can be matched - WhitespaceToken // space \t \v \f - LineTerminatorToken // \r \n \r\n - SingleLineCommentToken - MultiLineCommentToken // token for comments with line terminators (not just any /*block*/) - IdentifierToken - PunctuatorToken /* { } ( ) [ ] . ; , < > <= >= == != === !== + - * % ++ -- << >> - >>> & | ^ ! ~ && || ? : = += -= *= %= <<= >>= >>>= &= |= ^= / /= >= */ - NumericToken - StringToken - RegexpToken - TemplateToken -) - -// TokenState determines a state in which next token should be read -type TokenState uint32 - -// TokenState values -const ( - ExprState TokenState = iota - StmtParensState - SubscriptState - PropNameState -) - -// ParsingContext determines the context in which following token should be parsed. -// This affects parsing regular expressions and template literals. -type ParsingContext uint32 - -// ParsingContext values -const ( - GlobalContext ParsingContext = iota - StmtParensContext - ExprParensContext - BracesContext - TemplateContext -) - -// String returns the string representation of a TokenType. -func (tt TokenType) String() string { - switch tt { - case ErrorToken: - return "Error" - case UnknownToken: - return "Unknown" - case WhitespaceToken: - return "Whitespace" - case LineTerminatorToken: - return "LineTerminator" - case SingleLineCommentToken: - return "SingleLineComment" - case MultiLineCommentToken: - return "MultiLineComment" - case IdentifierToken: - return "Identifier" - case PunctuatorToken: - return "Punctuator" - case NumericToken: - return "Numeric" - case StringToken: - return "String" - case RegexpToken: - return "Regexp" - case TemplateToken: - return "Template" - } - return "Invalid(" + strconv.Itoa(int(tt)) + ")" -} - -//////////////////////////////////////////////////////////////// - -// Lexer is the state for the lexer. -type Lexer struct { - r *buffer.Lexer - stack []ParsingContext - state TokenState - emptyLine bool -} - -// NewLexer returns a new Lexer for a given io.Reader. -func NewLexer(r io.Reader) *Lexer { - return &Lexer{ - r: buffer.NewLexer(r), - stack: make([]ParsingContext, 0, 16), - state: ExprState, - emptyLine: true, - } -} - -func (l *Lexer) enterContext(context ParsingContext) { - l.stack = append(l.stack, context) -} - -func (l *Lexer) leaveContext() ParsingContext { - ctx := GlobalContext - if last := len(l.stack) - 1; last >= 0 { - ctx, l.stack = l.stack[last], l.stack[:last] - } - return ctx -} - -// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned. -func (l *Lexer) Err() error { - return l.r.Err() -} - -// Restore restores the NULL byte at the end of the buffer. -func (l *Lexer) Restore() { - l.r.Restore() -} - -// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message. -func (l *Lexer) Next() (TokenType, []byte) { - tt := UnknownToken - c := l.r.Peek(0) - switch c { - case '(': - if l.state == StmtParensState { - l.enterContext(StmtParensContext) - } else { - l.enterContext(ExprParensContext) - } - l.state = ExprState - l.r.Move(1) - tt = PunctuatorToken - case ')': - if l.leaveContext() == StmtParensContext { - l.state = ExprState - } else { - l.state = SubscriptState - } - l.r.Move(1) - tt = PunctuatorToken - case '{': - l.enterContext(BracesContext) - l.state = ExprState - l.r.Move(1) - tt = PunctuatorToken - case '}': - if l.leaveContext() == TemplateContext && l.consumeTemplateToken() { - tt = TemplateToken - } else { - // will work incorrectly for objects or functions divided by something, - // but that's an extremely rare case - l.state = ExprState - l.r.Move(1) - tt = PunctuatorToken - } - case ']': - l.state = SubscriptState - l.r.Move(1) - tt = PunctuatorToken - case '[', ';', ',', '~', '?', ':': - l.state = ExprState - l.r.Move(1) - tt = PunctuatorToken - case '<', '>', '=', '!', '+', '-', '*', '%', '&', '|', '^': - if l.consumeHTMLLikeCommentToken() { - return SingleLineCommentToken, l.r.Shift() - } else if l.consumeLongPunctuatorToken() { - l.state = ExprState - tt = PunctuatorToken - } - case '/': - if tt = l.consumeCommentToken(); tt != UnknownToken { - return tt, l.r.Shift() - } else if l.state == ExprState && l.consumeRegexpToken() { - l.state = SubscriptState - tt = RegexpToken - } else if l.consumeLongPunctuatorToken() { - l.state = ExprState - tt = PunctuatorToken - } - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.': - if l.consumeNumericToken() { - tt = NumericToken - l.state = SubscriptState - } else if c == '.' { - l.state = PropNameState - l.r.Move(1) - tt = PunctuatorToken - } - case '\'', '"': - if l.consumeStringToken() { - l.state = SubscriptState - tt = StringToken - } - case ' ', '\t', '\v', '\f': - l.r.Move(1) - for l.consumeWhitespace() { - } - return WhitespaceToken, l.r.Shift() - case '\n', '\r': - l.r.Move(1) - for l.consumeLineTerminator() { - } - tt = LineTerminatorToken - case '`': - if l.consumeTemplateToken() { - tt = TemplateToken - } - default: - if l.consumeIdentifierToken() { - tt = IdentifierToken - if l.state != PropNameState { - switch hash := ToHash(l.r.Lexeme()); hash { - case 0, This, False, True, Null: - l.state = SubscriptState - case If, While, For, With: - l.state = StmtParensState - default: - // This will include keywords that can't be followed by a regexp, but only - // by a specified char (like `switch` or `try`), but we don't check for syntax - // errors as we don't attempt to parse a full JS grammar when streaming - l.state = ExprState - } - } else { - l.state = SubscriptState - } - } else if c >= 0xC0 { - if l.consumeWhitespace() { - for l.consumeWhitespace() { - } - return WhitespaceToken, l.r.Shift() - } else if l.consumeLineTerminator() { - for l.consumeLineTerminator() { - } - tt = LineTerminatorToken - } - } else if l.Err() != nil { - return ErrorToken, nil - } - } - - l.emptyLine = tt == LineTerminatorToken - - if tt == UnknownToken { - _, n := l.r.PeekRune(0) - l.r.Move(n) - } - return tt, l.r.Shift() -} - -//////////////////////////////////////////////////////////////// - -/* -The following functions follow the specifications at http://www.ecma-international.org/ecma-262/5.1/ -*/ - -func (l *Lexer) consumeWhitespace() bool { - c := l.r.Peek(0) - if c == ' ' || c == '\t' || c == '\v' || c == '\f' { - l.r.Move(1) - return true - } else if c >= 0xC0 { - if r, n := l.r.PeekRune(0); r == '\u00A0' || r == '\uFEFF' || unicode.Is(unicode.Zs, r) { - l.r.Move(n) - return true - } - } - return false -} - -func (l *Lexer) consumeLineTerminator() bool { - c := l.r.Peek(0) - if c == '\n' { - l.r.Move(1) - return true - } else if c == '\r' { - if l.r.Peek(1) == '\n' { - l.r.Move(2) - } else { - l.r.Move(1) - } - return true - } else if c >= 0xC0 { - if r, n := l.r.PeekRune(0); r == '\u2028' || r == '\u2029' { - l.r.Move(n) - return true - } - } - return false -} - -func (l *Lexer) consumeDigit() bool { - if c := l.r.Peek(0); c >= '0' && c <= '9' { - l.r.Move(1) - return true - } - return false -} - -func (l *Lexer) consumeHexDigit() bool { - if c := l.r.Peek(0); (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') { - l.r.Move(1) - return true - } - return false -} - -func (l *Lexer) consumeBinaryDigit() bool { - if c := l.r.Peek(0); c == '0' || c == '1' { - l.r.Move(1) - return true - } - return false -} - -func (l *Lexer) consumeOctalDigit() bool { - if c := l.r.Peek(0); c >= '0' && c <= '7' { - l.r.Move(1) - return true - } - return false -} - -func (l *Lexer) consumeUnicodeEscape() bool { - if l.r.Peek(0) != '\\' || l.r.Peek(1) != 'u' { - return false - } - mark := l.r.Pos() - l.r.Move(2) - if c := l.r.Peek(0); c == '{' { - l.r.Move(1) - if l.consumeHexDigit() { - for l.consumeHexDigit() { - } - if c := l.r.Peek(0); c == '}' { - l.r.Move(1) - return true - } - } - l.r.Rewind(mark) - return false - } else if !l.consumeHexDigit() || !l.consumeHexDigit() || !l.consumeHexDigit() || !l.consumeHexDigit() { - l.r.Rewind(mark) - return false - } - return true -} - -func (l *Lexer) consumeSingleLineComment() { - for { - c := l.r.Peek(0) - if c == '\r' || c == '\n' || c == 0 { - break - } else if c >= 0xC0 { - if r, _ := l.r.PeekRune(0); r == '\u2028' || r == '\u2029' { - break - } - } - l.r.Move(1) - } -} - -//////////////////////////////////////////////////////////////// - -func (l *Lexer) consumeHTMLLikeCommentToken() bool { - c := l.r.Peek(0) - if c == '<' && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' { - // opening HTML-style single line comment - l.r.Move(4) - l.consumeSingleLineComment() - return true - } else if l.emptyLine && c == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' { - // closing HTML-style single line comment - // (only if current line didn't contain any meaningful tokens) - l.r.Move(3) - l.consumeSingleLineComment() - return true - } - return false -} - -func (l *Lexer) consumeCommentToken() TokenType { - c := l.r.Peek(0) - if c == '/' { - c = l.r.Peek(1) - if c == '/' { - // single line comment - l.r.Move(2) - l.consumeSingleLineComment() - return SingleLineCommentToken - } else if c == '*' { - // block comment (potentially multiline) - tt := SingleLineCommentToken - l.r.Move(2) - for { - c := l.r.Peek(0) - if c == '*' && l.r.Peek(1) == '/' { - l.r.Move(2) - break - } else if c == 0 { - break - } else if l.consumeLineTerminator() { - tt = MultiLineCommentToken - l.emptyLine = true - } else { - l.r.Move(1) - } - } - return tt - } - } - return UnknownToken -} - -func (l *Lexer) consumeLongPunctuatorToken() bool { - c := l.r.Peek(0) - if c == '!' || c == '=' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '&' || c == '|' || c == '^' { - l.r.Move(1) - if l.r.Peek(0) == '=' { - l.r.Move(1) - if (c == '!' || c == '=') && l.r.Peek(0) == '=' { - l.r.Move(1) - } - } else if (c == '+' || c == '-' || c == '&' || c == '|') && l.r.Peek(0) == c { - l.r.Move(1) - } else if c == '=' && l.r.Peek(0) == '>' { - l.r.Move(1) - } - } else { // c == '<' || c == '>' - l.r.Move(1) - if l.r.Peek(0) == c { - l.r.Move(1) - if c == '>' && l.r.Peek(0) == '>' { - l.r.Move(1) - } - } - if l.r.Peek(0) == '=' { - l.r.Move(1) - } - } - return true -} - -func (l *Lexer) consumeIdentifierToken() bool { - c := l.r.Peek(0) - if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '$' || c == '_' { - l.r.Move(1) - } else if c >= 0xC0 { - if r, n := l.r.PeekRune(0); unicode.IsOneOf(identifierStart, r) { - l.r.Move(n) - } else { - return false - } - } else if !l.consumeUnicodeEscape() { - return false - } - for { - c := l.r.Peek(0) - if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '$' || c == '_' { - l.r.Move(1) - } else if c >= 0xC0 { - if r, n := l.r.PeekRune(0); r == '\u200C' || r == '\u200D' || unicode.IsOneOf(identifierContinue, r) { - l.r.Move(n) - } else { - break - } - } else { - break - } - } - return true -} - -func (l *Lexer) consumeNumericToken() bool { - // assume to be on 0 1 2 3 4 5 6 7 8 9 . - mark := l.r.Pos() - c := l.r.Peek(0) - if c == '0' { - l.r.Move(1) - if l.r.Peek(0) == 'x' || l.r.Peek(0) == 'X' { - l.r.Move(1) - if l.consumeHexDigit() { - for l.consumeHexDigit() { - } - } else { - l.r.Move(-1) // return just the zero - } - return true - } else if l.r.Peek(0) == 'b' || l.r.Peek(0) == 'B' { - l.r.Move(1) - if l.consumeBinaryDigit() { - for l.consumeBinaryDigit() { - } - } else { - l.r.Move(-1) // return just the zero - } - return true - } else if l.r.Peek(0) == 'o' || l.r.Peek(0) == 'O' { - l.r.Move(1) - if l.consumeOctalDigit() { - for l.consumeOctalDigit() { - } - } else { - l.r.Move(-1) // return just the zero - } - return true - } - } else if c != '.' { - for l.consumeDigit() { - } - } - if l.r.Peek(0) == '.' { - l.r.Move(1) - if l.consumeDigit() { - for l.consumeDigit() { - } - } else if c != '.' { - // . could belong to the next token - l.r.Move(-1) - return true - } else { - l.r.Rewind(mark) - return false - } - } - mark = l.r.Pos() - c = l.r.Peek(0) - if c == 'e' || c == 'E' { - l.r.Move(1) - c = l.r.Peek(0) - if c == '+' || c == '-' { - l.r.Move(1) - } - if !l.consumeDigit() { - // e could belong to the next token - l.r.Rewind(mark) - return true - } - for l.consumeDigit() { - } - } - return true -} - -func (l *Lexer) consumeStringToken() bool { - // assume to be on ' or " - mark := l.r.Pos() - delim := l.r.Peek(0) - l.r.Move(1) - for { - c := l.r.Peek(0) - if c == delim { - l.r.Move(1) - break - } else if c == '\\' { - l.r.Move(1) - if !l.consumeLineTerminator() { - if c := l.r.Peek(0); c == delim || c == '\\' { - l.r.Move(1) - } - } - continue - } else if c == '\n' || c == '\r' { - l.r.Rewind(mark) - return false - } else if c >= 0xC0 { - if r, _ := l.r.PeekRune(0); r == '\u2028' || r == '\u2029' { - l.r.Rewind(mark) - return false - } - } else if c == 0 { - break - } - l.r.Move(1) - } - return true -} - -func (l *Lexer) consumeRegexpToken() bool { - // assume to be on / and not /* - mark := l.r.Pos() - l.r.Move(1) - inClass := false - for { - c := l.r.Peek(0) - if !inClass && c == '/' { - l.r.Move(1) - break - } else if c == '[' { - inClass = true - } else if c == ']' { - inClass = false - } else if c == '\\' { - l.r.Move(1) - if l.consumeLineTerminator() { - l.r.Rewind(mark) - return false - } else if l.r.Peek(0) == 0 { - return true - } - } else if l.consumeLineTerminator() { - l.r.Rewind(mark) - return false - } else if c == 0 { - return true - } - l.r.Move(1) - } - // flags - for { - c := l.r.Peek(0) - if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '$' || c == '_' { - l.r.Move(1) - } else if c >= 0xC0 { - if r, n := l.r.PeekRune(0); r == '\u200C' || r == '\u200D' || unicode.IsOneOf(identifierContinue, r) { - l.r.Move(n) - } else { - break - } - } else { - break - } - } - return true -} - -func (l *Lexer) consumeTemplateToken() bool { - // assume to be on ` or } when already within template - mark := l.r.Pos() - l.r.Move(1) - for { - c := l.r.Peek(0) - if c == '`' { - l.state = SubscriptState - l.r.Move(1) - return true - } else if c == '$' && l.r.Peek(1) == '{' { - l.enterContext(TemplateContext) - l.state = ExprState - l.r.Move(2) - return true - } else if c == '\\' { - l.r.Move(1) - if c := l.r.Peek(0); c != 0 { - l.r.Move(1) - } - continue - } else if c == 0 { - l.r.Rewind(mark) - return false - } - l.r.Move(1) - } -} diff --git a/vendor/github.com/tdewolff/parse/js/lex_test.go b/vendor/github.com/tdewolff/parse/js/lex_test.go deleted file mode 100644 index 1866087..0000000 --- a/vendor/github.com/tdewolff/parse/js/lex_test.go +++ /dev/null @@ -1,155 +0,0 @@ -package js // import "github.com/tdewolff/parse/js" - -import ( - "bytes" - "fmt" - "io" - "testing" - - "github.com/tdewolff/test" -) - -type TTs []TokenType - -func TestTokens(t *testing.T) { - var tokenTests = []struct { - js string - expected []TokenType - }{ - {" \t\v\f\u00A0\uFEFF\u2000", TTs{}}, // WhitespaceToken - {"\n\r\r\n\u2028\u2029", TTs{LineTerminatorToken}}, - {"5.2 .04 0x0F 5e99", TTs{NumericToken, NumericToken, NumericToken, NumericToken}}, - {"a = 'string'", TTs{IdentifierToken, PunctuatorToken, StringToken}}, - {"/*comment*/ //comment", TTs{SingleLineCommentToken, SingleLineCommentToken}}, - {"{ } ( ) [ ]", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, - {". ; , < > <=", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, - {">= == != === !==", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, - {"+ - * % ++ --", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, - {"<< >> >>> & | ^", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, - {"! ~ && || ? :", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, - {"= += -= *= %= <<=", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, - {">>= >>>= &= |= ^= =>", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken}}, - {"a = /.*/g;", TTs{IdentifierToken, PunctuatorToken, RegexpToken, PunctuatorToken}}, - - {"/*co\nm\u2028m/*ent*/ //co//mment\u2029//comment", TTs{MultiLineCommentToken, SingleLineCommentToken, LineTerminatorToken, SingleLineCommentToken}}, - {"<!-", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken}}, - {"1<!--2\n", TTs{NumericToken, SingleLineCommentToken, LineTerminatorToken}}, - {"x=y-->10\n", TTs{IdentifierToken, PunctuatorToken, IdentifierToken, PunctuatorToken, PunctuatorToken, NumericToken, LineTerminatorToken}}, - {" /*comment*/ -->nothing\n", TTs{SingleLineCommentToken, SingleLineCommentToken, LineTerminatorToken}}, - {"1 /*comment\nmultiline*/ -->nothing\n", TTs{NumericToken, MultiLineCommentToken, SingleLineCommentToken, LineTerminatorToken}}, - {"$ _\u200C \\u2000 \u200C", TTs{IdentifierToken, IdentifierToken, IdentifierToken, UnknownToken}}, - {">>>=>>>>=", TTs{PunctuatorToken, PunctuatorToken, PunctuatorToken}}, - {"1/", TTs{NumericToken, PunctuatorToken}}, - {"1/=", TTs{NumericToken, PunctuatorToken}}, - {"010xF", TTs{NumericToken, NumericToken, IdentifierToken}}, - {"50e+-0", TTs{NumericToken, IdentifierToken, PunctuatorToken, PunctuatorToken, NumericToken}}, - {"'str\\i\\'ng'", TTs{StringToken}}, - {"'str\\\\'abc", TTs{StringToken, IdentifierToken}}, - {"'str\\\ni\\\\u00A0ng'", TTs{StringToken}}, - {"a = /[a-z/]/g", TTs{IdentifierToken, PunctuatorToken, RegexpToken}}, - {"a=/=/g1", TTs{IdentifierToken, PunctuatorToken, RegexpToken}}, - {"a = /'\\\\/\n", TTs{IdentifierToken, PunctuatorToken, RegexpToken, LineTerminatorToken}}, - {"a=/\\//g1", TTs{IdentifierToken, PunctuatorToken, RegexpToken}}, - {"new RegExp(a + /\\d{1,2}/.source)", TTs{IdentifierToken, IdentifierToken, PunctuatorToken, IdentifierToken, PunctuatorToken, RegexpToken, PunctuatorToken, IdentifierToken, PunctuatorToken}}, - - {"0b0101 0o0707 0b17", TTs{NumericToken, NumericToken, NumericToken, NumericToken}}, - {"`template`", TTs{TemplateToken}}, - {"`a${x+y}b`", TTs{TemplateToken, IdentifierToken, PunctuatorToken, IdentifierToken, TemplateToken}}, - {"`temp\nlate`", TTs{TemplateToken}}, - {"`outer${{x: 10}}bar${ raw`nested${2}endnest` }end`", TTs{TemplateToken, PunctuatorToken, IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, TemplateToken, IdentifierToken, TemplateToken, NumericToken, TemplateToken, TemplateToken}}, - - // early endings - {"'string", TTs{StringToken}}, - {"'\n '\u2028", TTs{UnknownToken, LineTerminatorToken, UnknownToken, LineTerminatorToken}}, - {"'str\\\U00100000ing\\0'", TTs{StringToken}}, - {"'strin\\00g'", TTs{StringToken}}, - {"/*comment", TTs{SingleLineCommentToken}}, - {"a=/regexp", TTs{IdentifierToken, PunctuatorToken, RegexpToken}}, - {"\\u002", TTs{UnknownToken, IdentifierToken}}, - - // coverage - {"Ø a〉", TTs{IdentifierToken, IdentifierToken, UnknownToken}}, - {"0xg 0.f", TTs{NumericToken, IdentifierToken, NumericToken, PunctuatorToken, IdentifierToken}}, - {"0bg 0og", TTs{NumericToken, IdentifierToken, NumericToken, IdentifierToken}}, - {"\u00A0\uFEFF\u2000", TTs{}}, - {"\u2028\u2029", TTs{LineTerminatorToken}}, - {"\\u0029ident", TTs{IdentifierToken}}, - {"\\u{0029FEF}ident", TTs{IdentifierToken}}, - {"\\u{}", TTs{UnknownToken, IdentifierToken, PunctuatorToken, PunctuatorToken}}, - {"\\ugident", TTs{UnknownToken, IdentifierToken}}, - {"'str\u2028ing'", TTs{UnknownToken, IdentifierToken, LineTerminatorToken, IdentifierToken, StringToken}}, - {"a=/\\\n", TTs{IdentifierToken, PunctuatorToken, PunctuatorToken, UnknownToken, LineTerminatorToken}}, - {"a=/x/\u200C\u3009", TTs{IdentifierToken, PunctuatorToken, RegexpToken, UnknownToken}}, - {"a=/x\n", TTs{IdentifierToken, PunctuatorToken, PunctuatorToken, IdentifierToken, LineTerminatorToken}}, - - {"return /abc/;", TTs{IdentifierToken, RegexpToken, PunctuatorToken}}, - {"yield /abc/;", TTs{IdentifierToken, RegexpToken, PunctuatorToken}}, - {"a/b/g", TTs{IdentifierToken, PunctuatorToken, IdentifierToken, PunctuatorToken, IdentifierToken}}, - {"{}/1/g", TTs{PunctuatorToken, PunctuatorToken, RegexpToken}}, - {"i(0)/1/g", TTs{IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, PunctuatorToken, NumericToken, PunctuatorToken, IdentifierToken}}, - {"if(0)/1/g", TTs{IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, RegexpToken}}, - {"a.if(0)/1/g", TTs{IdentifierToken, PunctuatorToken, IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, PunctuatorToken, NumericToken, PunctuatorToken, IdentifierToken}}, - {"while(0)/1/g", TTs{IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, RegexpToken}}, - {"for(;;)/1/g", TTs{IdentifierToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, RegexpToken}}, - {"with(0)/1/g", TTs{IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, RegexpToken}}, - {"this/1/g", TTs{IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, IdentifierToken}}, - {"case /1/g:", TTs{IdentifierToken, RegexpToken, PunctuatorToken}}, - {"function f(){}/1/g", TTs{IdentifierToken, IdentifierToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, PunctuatorToken, RegexpToken}}, - {"this.return/1/g", TTs{IdentifierToken, PunctuatorToken, IdentifierToken, PunctuatorToken, NumericToken, PunctuatorToken, IdentifierToken}}, - {"(a+b)/1/g", TTs{PunctuatorToken, IdentifierToken, PunctuatorToken, IdentifierToken, PunctuatorToken, PunctuatorToken, NumericToken, PunctuatorToken, IdentifierToken}}, - {"`\\``", TTs{TemplateToken}}, - {"`\\${ 1 }`", TTs{TemplateToken}}, - {"`\\\r\n`", TTs{TemplateToken}}, - - // go fuzz - {"`", TTs{UnknownToken}}, - } - - for _, tt := range tokenTests { - t.Run(tt.js, func(t *testing.T) { - l := NewLexer(bytes.NewBufferString(tt.js)) - i := 0 - j := 0 - for { - token, _ := l.Next() - j++ - if token == ErrorToken { - test.T(t, l.Err(), io.EOF) - test.T(t, i, len(tt.expected), "when error occurred we must be at the end") - break - } else if token == WhitespaceToken { - continue - } - if i < len(tt.expected) { - if token != tt.expected[i] { - test.String(t, token.String(), tt.expected[i].String(), "token types must match") - break - } - } else { - test.Fail(t, "index", i, "must not exceed expected token types size", len(tt.expected)) - break - } - i++ - } - }) - } - - test.T(t, WhitespaceToken.String(), "Whitespace") - test.T(t, TokenType(100).String(), "Invalid(100)") -} - -//////////////////////////////////////////////////////////////// - -func ExampleNewLexer() { - l := NewLexer(bytes.NewBufferString("var x = 'lorem ipsum';")) - out := "" - for { - tt, data := l.Next() - if tt == ErrorToken { - break - } - out += string(data) - } - fmt.Println(out) - // Output: var x = 'lorem ipsum'; -} |