diff options
Diffstat (limited to 'vendor/github.com/tdewolff/parse/json/parse.go')
-rw-r--r-- | vendor/github.com/tdewolff/parse/json/parse.go | 307 |
1 files changed, 307 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/parse/json/parse.go b/vendor/github.com/tdewolff/parse/json/parse.go new file mode 100644 index 0000000..ae133f2 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/json/parse.go @@ -0,0 +1,307 @@ +// Package json is a JSON parser following the specifications at http://json.org/. +package json // import "github.com/tdewolff/parse/json" + +import ( + "io" + "strconv" + + "github.com/tdewolff/parse" + "github.com/tdewolff/parse/buffer" +) + +// GrammarType determines the type of grammar +type GrammarType uint32 + +// GrammarType values. +const ( + ErrorGrammar GrammarType = iota // extra grammar when errors occur + WhitespaceGrammar + LiteralGrammar + NumberGrammar + StringGrammar + StartObjectGrammar // { + EndObjectGrammar // } + StartArrayGrammar // [ + EndArrayGrammar // ] +) + +// String returns the string representation of a GrammarType. +func (gt GrammarType) String() string { + switch gt { + case ErrorGrammar: + return "Error" + case WhitespaceGrammar: + return "Whitespace" + case LiteralGrammar: + return "Literal" + case NumberGrammar: + return "Number" + case StringGrammar: + return "String" + case StartObjectGrammar: + return "StartObject" + case EndObjectGrammar: + return "EndObject" + case StartArrayGrammar: + return "StartArray" + case EndArrayGrammar: + return "EndArray" + } + return "Invalid(" + strconv.Itoa(int(gt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// State determines the current state the parser is in. +type State uint32 + +// State values. +const ( + ValueState State = iota // extra token when errors occur + ObjectKeyState + ObjectValueState + ArrayState +) + +// String returns the string representation of a State. +func (state State) String() string { + switch state { + case ValueState: + return "Value" + case ObjectKeyState: + return "ObjectKey" + case ObjectValueState: + return "ObjectValue" + case ArrayState: + return "Array" + } + return "Invalid(" + strconv.Itoa(int(state)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// Parser is the state for the lexer. +type Parser struct { + r *buffer.Lexer + state []State + err error + + needComma bool +} + +// NewParser returns a new Parser for a given io.Reader. +func NewParser(r io.Reader) *Parser { + return &Parser{ + r: buffer.NewLexer(r), + state: []State{ValueState}, + } +} + +// Err returns the error encountered during tokenization, this is often io.EOF but also other errors can be returned. +func (p *Parser) Err() error { + if err := p.r.Err(); err != nil { + return err + } + return p.err +} + +// Restore restores the NULL byte at the end of the buffer. +func (p *Parser) Restore() { + p.r.Restore() +} + +// Next returns the next Grammar. It returns ErrorGrammar when an error was encountered. Using Err() one can retrieve the error message. +func (p *Parser) Next() (GrammarType, []byte) { + p.moveWhitespace() + c := p.r.Peek(0) + state := p.state[len(p.state)-1] + if c == ',' { + if state != ArrayState && state != ObjectKeyState { + p.err = parse.NewErrorLexer("unexpected comma character outside an array or object", p.r) + return ErrorGrammar, nil + } + p.r.Move(1) + p.moveWhitespace() + p.needComma = false + c = p.r.Peek(0) + } + p.r.Skip() + + if p.needComma && c != '}' && c != ']' && c != 0 { + p.err = parse.NewErrorLexer("expected comma character or an array or object ending", p.r) + return ErrorGrammar, nil + } else if c == '{' { + p.state = append(p.state, ObjectKeyState) + p.r.Move(1) + return StartObjectGrammar, p.r.Shift() + } else if c == '}' { + if state != ObjectKeyState { + p.err = parse.NewErrorLexer("unexpected right brace character", p.r) + return ErrorGrammar, nil + } + p.needComma = true + p.state = p.state[:len(p.state)-1] + if p.state[len(p.state)-1] == ObjectValueState { + p.state[len(p.state)-1] = ObjectKeyState + } + p.r.Move(1) + return EndObjectGrammar, p.r.Shift() + } else if c == '[' { + p.state = append(p.state, ArrayState) + p.r.Move(1) + return StartArrayGrammar, p.r.Shift() + } else if c == ']' { + p.needComma = true + if state != ArrayState { + p.err = parse.NewErrorLexer("unexpected right bracket character", p.r) + return ErrorGrammar, nil + } + p.state = p.state[:len(p.state)-1] + if p.state[len(p.state)-1] == ObjectValueState { + p.state[len(p.state)-1] = ObjectKeyState + } + p.r.Move(1) + return EndArrayGrammar, p.r.Shift() + } else if state == ObjectKeyState { + if c != '"' || !p.consumeStringToken() { + p.err = parse.NewErrorLexer("expected object key to be a quoted string", p.r) + return ErrorGrammar, nil + } + n := p.r.Pos() + p.moveWhitespace() + if c := p.r.Peek(0); c != ':' { + p.err = parse.NewErrorLexer("expected colon character after object key", p.r) + return ErrorGrammar, nil + } + p.r.Move(1) + p.state[len(p.state)-1] = ObjectValueState + return StringGrammar, p.r.Shift()[:n] + } else { + p.needComma = true + if state == ObjectValueState { + p.state[len(p.state)-1] = ObjectKeyState + } + if c == '"' && p.consumeStringToken() { + return StringGrammar, p.r.Shift() + } else if p.consumeNumberToken() { + return NumberGrammar, p.r.Shift() + } else if p.consumeLiteralToken() { + return LiteralGrammar, p.r.Shift() + } + } + return ErrorGrammar, nil +} + +// State returns the state the parser is currently in (ie. which token is expected). +func (p *Parser) State() State { + return p.state[len(p.state)-1] +} + +//////////////////////////////////////////////////////////////// + +/* +The following functions follow the specifications at http://json.org/ +*/ + +func (p *Parser) moveWhitespace() { + for { + if c := p.r.Peek(0); c != ' ' && c != '\n' && c != '\r' && c != '\t' { + break + } + p.r.Move(1) + } +} + +func (p *Parser) consumeLiteralToken() bool { + c := p.r.Peek(0) + if c == 't' && p.r.Peek(1) == 'r' && p.r.Peek(2) == 'u' && p.r.Peek(3) == 'e' { + p.r.Move(4) + return true + } else if c == 'f' && p.r.Peek(1) == 'a' && p.r.Peek(2) == 'l' && p.r.Peek(3) == 's' && p.r.Peek(4) == 'e' { + p.r.Move(5) + return true + } else if c == 'n' && p.r.Peek(1) == 'u' && p.r.Peek(2) == 'l' && p.r.Peek(3) == 'l' { + p.r.Move(4) + return true + } + return false +} + +func (p *Parser) consumeNumberToken() bool { + mark := p.r.Pos() + if p.r.Peek(0) == '-' { + p.r.Move(1) + } + c := p.r.Peek(0) + if c >= '1' && c <= '9' { + p.r.Move(1) + for { + if c := p.r.Peek(0); c < '0' || c > '9' { + break + } + p.r.Move(1) + } + } else if c != '0' { + p.r.Rewind(mark) + return false + } else { + p.r.Move(1) // 0 + } + if c := p.r.Peek(0); c == '.' { + p.r.Move(1) + if c := p.r.Peek(0); c < '0' || c > '9' { + p.r.Move(-1) + return true + } + for { + if c := p.r.Peek(0); c < '0' || c > '9' { + break + } + p.r.Move(1) + } + } + mark = p.r.Pos() + if c := p.r.Peek(0); c == 'e' || c == 'E' { + p.r.Move(1) + if c := p.r.Peek(0); c == '+' || c == '-' { + p.r.Move(1) + } + if c := p.r.Peek(0); c < '0' || c > '9' { + p.r.Rewind(mark) + return true + } + for { + if c := p.r.Peek(0); c < '0' || c > '9' { + break + } + p.r.Move(1) + } + } + return true +} + +func (p *Parser) consumeStringToken() bool { + // assume to be on " + p.r.Move(1) + for { + c := p.r.Peek(0) + if c == '"' { + escaped := false + for i := p.r.Pos() - 1; i >= 0; i-- { + if p.r.Lexeme()[i] == '\\' { + escaped = !escaped + } else { + break + } + } + if !escaped { + p.r.Move(1) + break + } + } else if c == 0 { + return false + } + p.r.Move(1) + } + return true +} |