diff options
Diffstat (limited to 'vendor/github.com/tdewolff/parse/json')
-rw-r--r-- | vendor/github.com/tdewolff/parse/json/README.md | 81 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/parse/json/parse.go | 307 | ||||
-rw-r--r-- | vendor/github.com/tdewolff/parse/json/parse_test.go | 159 |
3 files changed, 547 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/parse/json/README.md b/vendor/github.com/tdewolff/parse/json/README.md new file mode 100644 index 0000000..7621ca9 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/json/README.md @@ -0,0 +1,81 @@ +# JSON [![GoDoc](http://godoc.org/github.com/tdewolff/parse/json?status.svg)](http://godoc.org/github.com/tdewolff/parse/json) [![GoCover](http://gocover.io/_badge/github.com/tdewolff/parse/json)](http://gocover.io/github.com/tdewolff/parse/json) + +This package is a JSON lexer (ECMA-404) written in [Go][1]. It follows the specification at [JSON](http://json.org/). The lexer takes an io.Reader and converts it into tokens until the EOF. + +## Installation +Run the following command + + go get github.com/tdewolff/parse/json + +or add the following import and run project with `go get` + + import "github.com/tdewolff/parse/json" + +## Parser +### Usage +The following initializes a new Parser with io.Reader `r`: +``` go +p := json.NewParser(r) +``` + +To tokenize until EOF an error, use: +``` go +for { + gt, text := p.Next() + switch gt { + case json.ErrorGrammar: + // error or EOF set in p.Err() + return + // ... + } +} +``` + +All grammars: +``` go +ErrorGrammar GrammarType = iota // extra grammar when errors occur +WhitespaceGrammar // space \t \r \n +LiteralGrammar // null true false +NumberGrammar +StringGrammar +StartObjectGrammar // { +EndObjectGrammar // } +StartArrayGrammar // [ +EndArrayGrammar // ] +``` + +### Examples +``` go +package main + +import ( + "os" + + "github.com/tdewolff/parse/json" +) + +// Tokenize JSON from stdin. +func main() { + p := json.NewParser(os.Stdin) + for { + gt, text := p.Next() + switch gt { + case json.ErrorGrammar: + if p.Err() != io.EOF { + fmt.Println("Error on line", p.Line(), ":", p.Err()) + } + return + case json.LiteralGrammar: + fmt.Println("Literal", string(text)) + case json.NumberGrammar: + fmt.Println("Number", string(text)) + // ... + } + } +} +``` + +## License +Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). + +[1]: http://golang.org/ "Go Language" diff --git a/vendor/github.com/tdewolff/parse/json/parse.go b/vendor/github.com/tdewolff/parse/json/parse.go new file mode 100644 index 0000000..ae133f2 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/json/parse.go @@ -0,0 +1,307 @@ +// Package json is a JSON parser following the specifications at http://json.org/. +package json // import "github.com/tdewolff/parse/json" + +import ( + "io" + "strconv" + + "github.com/tdewolff/parse" + "github.com/tdewolff/parse/buffer" +) + +// GrammarType determines the type of grammar +type GrammarType uint32 + +// GrammarType values. +const ( + ErrorGrammar GrammarType = iota // extra grammar when errors occur + WhitespaceGrammar + LiteralGrammar + NumberGrammar + StringGrammar + StartObjectGrammar // { + EndObjectGrammar // } + StartArrayGrammar // [ + EndArrayGrammar // ] +) + +// String returns the string representation of a GrammarType. +func (gt GrammarType) String() string { + switch gt { + case ErrorGrammar: + return "Error" + case WhitespaceGrammar: + return "Whitespace" + case LiteralGrammar: + return "Literal" + case NumberGrammar: + return "Number" + case StringGrammar: + return "String" + case StartObjectGrammar: + return "StartObject" + case EndObjectGrammar: + return "EndObject" + case StartArrayGrammar: + return "StartArray" + case EndArrayGrammar: + return "EndArray" + } + return "Invalid(" + strconv.Itoa(int(gt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// State determines the current state the parser is in. +type State uint32 + +// State values. +const ( + ValueState State = iota // extra token when errors occur + ObjectKeyState + ObjectValueState + ArrayState +) + +// String returns the string representation of a State. +func (state State) String() string { + switch state { + case ValueState: + return "Value" + case ObjectKeyState: + return "ObjectKey" + case ObjectValueState: + return "ObjectValue" + case ArrayState: + return "Array" + } + return "Invalid(" + strconv.Itoa(int(state)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// Parser is the state for the lexer. +type Parser struct { + r *buffer.Lexer + state []State + err error + + needComma bool +} + +// NewParser returns a new Parser for a given io.Reader. +func NewParser(r io.Reader) *Parser { + return &Parser{ + r: buffer.NewLexer(r), + state: []State{ValueState}, + } +} + +// Err returns the error encountered during tokenization, this is often io.EOF but also other errors can be returned. +func (p *Parser) Err() error { + if err := p.r.Err(); err != nil { + return err + } + return p.err +} + +// Restore restores the NULL byte at the end of the buffer. +func (p *Parser) Restore() { + p.r.Restore() +} + +// Next returns the next Grammar. It returns ErrorGrammar when an error was encountered. Using Err() one can retrieve the error message. +func (p *Parser) Next() (GrammarType, []byte) { + p.moveWhitespace() + c := p.r.Peek(0) + state := p.state[len(p.state)-1] + if c == ',' { + if state != ArrayState && state != ObjectKeyState { + p.err = parse.NewErrorLexer("unexpected comma character outside an array or object", p.r) + return ErrorGrammar, nil + } + p.r.Move(1) + p.moveWhitespace() + p.needComma = false + c = p.r.Peek(0) + } + p.r.Skip() + + if p.needComma && c != '}' && c != ']' && c != 0 { + p.err = parse.NewErrorLexer("expected comma character or an array or object ending", p.r) + return ErrorGrammar, nil + } else if c == '{' { + p.state = append(p.state, ObjectKeyState) + p.r.Move(1) + return StartObjectGrammar, p.r.Shift() + } else if c == '}' { + if state != ObjectKeyState { + p.err = parse.NewErrorLexer("unexpected right brace character", p.r) + return ErrorGrammar, nil + } + p.needComma = true + p.state = p.state[:len(p.state)-1] + if p.state[len(p.state)-1] == ObjectValueState { + p.state[len(p.state)-1] = ObjectKeyState + } + p.r.Move(1) + return EndObjectGrammar, p.r.Shift() + } else if c == '[' { + p.state = append(p.state, ArrayState) + p.r.Move(1) + return StartArrayGrammar, p.r.Shift() + } else if c == ']' { + p.needComma = true + if state != ArrayState { + p.err = parse.NewErrorLexer("unexpected right bracket character", p.r) + return ErrorGrammar, nil + } + p.state = p.state[:len(p.state)-1] + if p.state[len(p.state)-1] == ObjectValueState { + p.state[len(p.state)-1] = ObjectKeyState + } + p.r.Move(1) + return EndArrayGrammar, p.r.Shift() + } else if state == ObjectKeyState { + if c != '"' || !p.consumeStringToken() { + p.err = parse.NewErrorLexer("expected object key to be a quoted string", p.r) + return ErrorGrammar, nil + } + n := p.r.Pos() + p.moveWhitespace() + if c := p.r.Peek(0); c != ':' { + p.err = parse.NewErrorLexer("expected colon character after object key", p.r) + return ErrorGrammar, nil + } + p.r.Move(1) + p.state[len(p.state)-1] = ObjectValueState + return StringGrammar, p.r.Shift()[:n] + } else { + p.needComma = true + if state == ObjectValueState { + p.state[len(p.state)-1] = ObjectKeyState + } + if c == '"' && p.consumeStringToken() { + return StringGrammar, p.r.Shift() + } else if p.consumeNumberToken() { + return NumberGrammar, p.r.Shift() + } else if p.consumeLiteralToken() { + return LiteralGrammar, p.r.Shift() + } + } + return ErrorGrammar, nil +} + +// State returns the state the parser is currently in (ie. which token is expected). +func (p *Parser) State() State { + return p.state[len(p.state)-1] +} + +//////////////////////////////////////////////////////////////// + +/* +The following functions follow the specifications at http://json.org/ +*/ + +func (p *Parser) moveWhitespace() { + for { + if c := p.r.Peek(0); c != ' ' && c != '\n' && c != '\r' && c != '\t' { + break + } + p.r.Move(1) + } +} + +func (p *Parser) consumeLiteralToken() bool { + c := p.r.Peek(0) + if c == 't' && p.r.Peek(1) == 'r' && p.r.Peek(2) == 'u' && p.r.Peek(3) == 'e' { + p.r.Move(4) + return true + } else if c == 'f' && p.r.Peek(1) == 'a' && p.r.Peek(2) == 'l' && p.r.Peek(3) == 's' && p.r.Peek(4) == 'e' { + p.r.Move(5) + return true + } else if c == 'n' && p.r.Peek(1) == 'u' && p.r.Peek(2) == 'l' && p.r.Peek(3) == 'l' { + p.r.Move(4) + return true + } + return false +} + +func (p *Parser) consumeNumberToken() bool { + mark := p.r.Pos() + if p.r.Peek(0) == '-' { + p.r.Move(1) + } + c := p.r.Peek(0) + if c >= '1' && c <= '9' { + p.r.Move(1) + for { + if c := p.r.Peek(0); c < '0' || c > '9' { + break + } + p.r.Move(1) + } + } else if c != '0' { + p.r.Rewind(mark) + return false + } else { + p.r.Move(1) // 0 + } + if c := p.r.Peek(0); c == '.' { + p.r.Move(1) + if c := p.r.Peek(0); c < '0' || c > '9' { + p.r.Move(-1) + return true + } + for { + if c := p.r.Peek(0); c < '0' || c > '9' { + break + } + p.r.Move(1) + } + } + mark = p.r.Pos() + if c := p.r.Peek(0); c == 'e' || c == 'E' { + p.r.Move(1) + if c := p.r.Peek(0); c == '+' || c == '-' { + p.r.Move(1) + } + if c := p.r.Peek(0); c < '0' || c > '9' { + p.r.Rewind(mark) + return true + } + for { + if c := p.r.Peek(0); c < '0' || c > '9' { + break + } + p.r.Move(1) + } + } + return true +} + +func (p *Parser) consumeStringToken() bool { + // assume to be on " + p.r.Move(1) + for { + c := p.r.Peek(0) + if c == '"' { + escaped := false + for i := p.r.Pos() - 1; i >= 0; i-- { + if p.r.Lexeme()[i] == '\\' { + escaped = !escaped + } else { + break + } + } + if !escaped { + p.r.Move(1) + break + } + } else if c == 0 { + return false + } + p.r.Move(1) + } + return true +} diff --git a/vendor/github.com/tdewolff/parse/json/parse_test.go b/vendor/github.com/tdewolff/parse/json/parse_test.go new file mode 100644 index 0000000..6ea28d1 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/json/parse_test.go @@ -0,0 +1,159 @@ +package json // import "github.com/tdewolff/parse/json" + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/tdewolff/parse" + "github.com/tdewolff/test" +) + +type GTs []GrammarType + +func TestGrammars(t *testing.T) { + var grammarTests = []struct { + json string + expected []GrammarType + }{ + {" \t\n\r", GTs{}}, // WhitespaceGrammar + {"null", GTs{LiteralGrammar}}, + {"[]", GTs{StartArrayGrammar, EndArrayGrammar}}, + {"15.2", GTs{NumberGrammar}}, + {"0.4", GTs{NumberGrammar}}, + {"5e9", GTs{NumberGrammar}}, + {"-4E-3", GTs{NumberGrammar}}, + {"true", GTs{LiteralGrammar}}, + {"false", GTs{LiteralGrammar}}, + {"null", GTs{LiteralGrammar}}, + {`""`, GTs{StringGrammar}}, + {`"abc"`, GTs{StringGrammar}}, + {`"\""`, GTs{StringGrammar}}, + {`"\\"`, GTs{StringGrammar}}, + {"{}", GTs{StartObjectGrammar, EndObjectGrammar}}, + {`{"a": "b", "c": "d"}`, GTs{StartObjectGrammar, StringGrammar, StringGrammar, StringGrammar, StringGrammar, EndObjectGrammar}}, + {`{"a": [1, 2], "b": {"c": 3}}`, GTs{StartObjectGrammar, StringGrammar, StartArrayGrammar, NumberGrammar, NumberGrammar, EndArrayGrammar, StringGrammar, StartObjectGrammar, StringGrammar, NumberGrammar, EndObjectGrammar, EndObjectGrammar}}, + {"[null,]", GTs{StartArrayGrammar, LiteralGrammar, EndArrayGrammar}}, + // {"[\"x\\\x00y\", 0]", GTs{StartArrayGrammar, StringGrammar, NumberGrammar, EndArrayGrammar}}, + } + for _, tt := range grammarTests { + t.Run(tt.json, func(t *testing.T) { + p := NewParser(bytes.NewBufferString(tt.json)) + i := 0 + for { + grammar, _ := p.Next() + if grammar == ErrorGrammar { + test.T(t, p.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } else if grammar == WhitespaceGrammar { + continue + } + test.That(t, i < len(tt.expected), "index", i, "must not exceed expected grammar types size", len(tt.expected)) + if i < len(tt.expected) { + test.T(t, grammar, tt.expected[i], "grammar types must match") + } + i++ + } + }) + } + + test.T(t, WhitespaceGrammar.String(), "Whitespace") + test.T(t, GrammarType(100).String(), "Invalid(100)") + test.T(t, ValueState.String(), "Value") + test.T(t, ObjectKeyState.String(), "ObjectKey") + test.T(t, ObjectValueState.String(), "ObjectValue") + test.T(t, ArrayState.String(), "Array") + test.T(t, State(100).String(), "Invalid(100)") +} + +func TestGrammarsError(t *testing.T) { + var grammarErrorTests = []struct { + json string + col int + }{ + {"true, false", 5}, + {"[true false]", 7}, + {"]", 1}, + {"}", 1}, + {"{0: 1}", 2}, + {"{\"a\" 1}", 6}, + {"1.", 2}, + {"1e+", 2}, + {`{"":"`, 0}, + {"\"a\\", 0}, + } + for _, tt := range grammarErrorTests { + t.Run(tt.json, func(t *testing.T) { + p := NewParser(bytes.NewBufferString(tt.json)) + for { + grammar, _ := p.Next() + if grammar == ErrorGrammar { + if tt.col == 0 { + test.T(t, p.Err(), io.EOF) + } else if perr, ok := p.Err().(*parse.Error); ok { + test.T(t, perr.Col, tt.col) + } else { + test.Fail(t, "bad error:", p.Err()) + } + break + } + } + }) + } +} + +func TestStates(t *testing.T) { + var stateTests = []struct { + json string + expected []State + }{ + {"null", []State{ValueState}}, + {"[null]", []State{ArrayState, ArrayState, ValueState}}, + {"{\"\":null}", []State{ObjectKeyState, ObjectValueState, ObjectKeyState, ValueState}}, + } + for _, tt := range stateTests { + t.Run(tt.json, func(t *testing.T) { + p := NewParser(bytes.NewBufferString(tt.json)) + i := 0 + for { + grammar, _ := p.Next() + state := p.State() + if grammar == ErrorGrammar { + test.T(t, p.Err(), io.EOF) + test.T(t, i, len(tt.expected), "when error occurred we must be at the end") + break + } else if grammar == WhitespaceGrammar { + continue + } + test.That(t, i < len(tt.expected), "index", i, "must not exceed expected states size", len(tt.expected)) + if i < len(tt.expected) { + test.T(t, state, tt.expected[i], "states must match") + } + i++ + } + }) + } +} + +//////////////////////////////////////////////////////////////// + +func ExampleNewParser() { + p := NewParser(bytes.NewBufferString(`{"key": 5}`)) + out := "" + for { + state := p.State() + gt, data := p.Next() + if gt == ErrorGrammar { + break + } + out += string(data) + if state == ObjectKeyState && gt != EndObjectGrammar { + out += ":" + } + // not handling comma insertion + } + fmt.Println(out) + // Output: {"key":5} +} |