diff options
Diffstat (limited to 'third_party/protobuf/3.2.0/csharp/src/Google.Protobuf/JsonTokenizer.cs')
-rw-r--r-- | third_party/protobuf/3.2.0/csharp/src/Google.Protobuf/JsonTokenizer.cs | 738 |
1 files changed, 0 insertions, 738 deletions
diff --git a/third_party/protobuf/3.2.0/csharp/src/Google.Protobuf/JsonTokenizer.cs b/third_party/protobuf/3.2.0/csharp/src/Google.Protobuf/JsonTokenizer.cs deleted file mode 100644 index 09a6d43b7b..0000000000 --- a/third_party/protobuf/3.2.0/csharp/src/Google.Protobuf/JsonTokenizer.cs +++ /dev/null @@ -1,738 +0,0 @@ -#region Copyright notice and license -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#endregion -using System; -using System.Collections.Generic; -using System.Globalization; -using System.IO; -using System.Text; - -namespace Google.Protobuf -{ - /// <summary> - /// Simple but strict JSON tokenizer, rigidly following RFC 7159. - /// </summary> - /// <remarks> - /// <para> - /// This tokenizer is stateful, and only returns "useful" tokens - names, values etc. - /// It does not create tokens for the separator between names and values, or for the comma - /// between values. It validates the token stream as it goes - so callers can assume that the - /// tokens it produces are appropriate. For example, it would never produce "start object, end array." - /// </para> - /// <para>Implementation details: the base class handles single token push-back and </para> - /// <para>Not thread-safe.</para> - /// </remarks> - internal abstract class JsonTokenizer - { - private JsonToken bufferedToken; - - /// <summary> - /// Creates a tokenizer that reads from the given text reader. - /// </summary> - internal static JsonTokenizer FromTextReader(TextReader reader) - { - return new JsonTextTokenizer(reader); - } - - /// <summary> - /// Creates a tokenizer that first replays the given list of tokens, then continues reading - /// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back - /// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was - /// created for the sake of Any parsing. - /// </summary> - internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation) - { - return new JsonReplayTokenizer(tokens, continuation); - } - - /// <summary> - /// Returns the depth of the stack, purely in objects (not collections). - /// Informally, this is the number of remaining unclosed '{' characters we have. - /// </summary> - internal int ObjectDepth { get; private set; } - - // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous - // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack). - internal void PushBack(JsonToken token) - { - if (bufferedToken != null) - { - throw new InvalidOperationException("Can't push back twice"); - } - bufferedToken = token; - if (token.Type == JsonToken.TokenType.StartObject) - { - ObjectDepth--; - } - else if (token.Type == JsonToken.TokenType.EndObject) - { - ObjectDepth++; - } - } - - /// <summary> - /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream, - /// after which point <c>Next()</c> should not be called again. - /// </summary> - /// <remarks>This implementation provides single-token buffering, and calls <see cref="NextImpl"/> if there is no buffered token.</remarks> - /// <returns>The next token in the stream. This is never null.</returns> - /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception> - /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception> - internal JsonToken Next() - { - JsonToken tokenToReturn; - if (bufferedToken != null) - { - tokenToReturn = bufferedToken; - bufferedToken = null; - } - else - { - tokenToReturn = NextImpl(); - } - if (tokenToReturn.Type == JsonToken.TokenType.StartObject) - { - ObjectDepth++; - } - else if (tokenToReturn.Type == JsonToken.TokenType.EndObject) - { - ObjectDepth--; - } - return tokenToReturn; - } - - /// <summary> - /// Returns the next JSON token in the stream, when requested by the base class. (The <see cref="Next"/> method delegates - /// to this if it doesn't have a buffered token.) - /// </summary> - /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception> - /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception> - protected abstract JsonToken NextImpl(); - - /// <summary> - /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer. - /// </summary> - private class JsonReplayTokenizer : JsonTokenizer - { - private readonly IList<JsonToken> tokens; - private readonly JsonTokenizer nextTokenizer; - private int nextTokenIndex; - - internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer) - { - this.tokens = tokens; - this.nextTokenizer = nextTokenizer; - } - - // FIXME: Object depth not maintained... - protected override JsonToken NextImpl() - { - if (nextTokenIndex >= tokens.Count) - { - return nextTokenizer.Next(); - } - return tokens[nextTokenIndex++]; - } - } - - /// <summary> - /// Tokenizer which does all the *real* work of parsing JSON. - /// </summary> - private sealed class JsonTextTokenizer : JsonTokenizer - { - // The set of states in which a value is valid next token. - private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument; - - private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>(); - private readonly PushBackReader reader; - private State state; - - internal JsonTextTokenizer(TextReader reader) - { - this.reader = new PushBackReader(reader); - state = State.StartOfDocument; - containerStack.Push(ContainerType.Document); - } - - /// <remarks> - /// This method essentially just loops through characters skipping whitespace, validating and - /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon) - /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point - /// it returns the token. Although the method is large, it would be relatively hard to break down further... most - /// of it is the large switch statement, which sometimes returns and sometimes doesn't. - /// </remarks> - protected override JsonToken NextImpl() - { - if (state == State.ReaderExhausted) - { - throw new InvalidOperationException("Next() called after end of document"); - } - while (true) - { - var next = reader.Read(); - if (next == null) - { - ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: "); - state = State.ReaderExhausted; - return JsonToken.EndDocument; - } - switch (next.Value) - { - // Skip whitespace between tokens - case ' ': - case '\t': - case '\r': - case '\n': - break; - case ':': - ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: "); - state = State.ObjectAfterColon; - break; - case ',': - ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a colon: "); - state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma; - break; - case '"': - string stringValue = ReadString(); - if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0) - { - state = State.ObjectBeforeColon; - return JsonToken.Name(stringValue); - } - else - { - ValidateAndModifyStateForValue("Invalid state to read a double quote: "); - return JsonToken.Value(stringValue); - } - case '{': - ValidateState(ValueStates, "Invalid state to read an open brace: "); - state = State.ObjectStart; - containerStack.Push(ContainerType.Object); - return JsonToken.StartObject; - case '}': - ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); - PopContainer(); - return JsonToken.EndObject; - case '[': - ValidateState(ValueStates, "Invalid state to read an open square bracket: "); - state = State.ArrayStart; - containerStack.Push(ContainerType.Array); - return JsonToken.StartArray; - case ']': - ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: "); - PopContainer(); - return JsonToken.EndArray; - case 'n': // Start of null - ConsumeLiteral("null"); - ValidateAndModifyStateForValue("Invalid state to read a null literal: "); - return JsonToken.Null; - case 't': // Start of true - ConsumeLiteral("true"); - ValidateAndModifyStateForValue("Invalid state to read a true literal: "); - return JsonToken.True; - case 'f': // Start of false - ConsumeLiteral("false"); - ValidateAndModifyStateForValue("Invalid state to read a false literal: "); - return JsonToken.False; - case '-': // Start of a number - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - double number = ReadNumber(next.Value); - ValidateAndModifyStateForValue("Invalid state to read a number token: "); - return JsonToken.Value(number); - default: - throw new InvalidJsonException("Invalid first character of token: " + next.Value); - } - } - } - - private void ValidateState(State validStates, string errorPrefix) - { - if ((validStates & state) == 0) - { - throw reader.CreateException(errorPrefix + state); - } - } - - /// <summary> - /// Reads a string token. It is assumed that the opening " has already been read. - /// </summary> - private string ReadString() - { - var value = new StringBuilder(); - bool haveHighSurrogate = false; - while (true) - { - char c = reader.ReadOrFail("Unexpected end of text while reading string"); - if (c < ' ') - { - throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c)); - } - if (c == '"') - { - if (haveHighSurrogate) - { - throw reader.CreateException("Invalid use of surrogate pair code units"); - } - return value.ToString(); - } - if (c == '\\') - { - c = ReadEscapedCharacter(); - } - // TODO: Consider only allowing surrogate pairs that are either both escaped, - // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate - // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8. - if (haveHighSurrogate != char.IsLowSurrogate(c)) - { - throw reader.CreateException("Invalid use of surrogate pair code units"); - } - haveHighSurrogate = char.IsHighSurrogate(c); - value.Append(c); - } - } - - /// <summary> - /// Reads an escaped character. It is assumed that the leading backslash has already been read. - /// </summary> - private char ReadEscapedCharacter() - { - char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence"); - switch (c) - { - case 'n': - return '\n'; - case '\\': - return '\\'; - case 'b': - return '\b'; - case 'f': - return '\f'; - case 'r': - return '\r'; - case 't': - return '\t'; - case '"': - return '"'; - case '/': - return '/'; - case 'u': - return ReadUnicodeEscape(); - default: - throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); - } - } - - /// <summary> - /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read. - /// </summary> - private char ReadUnicodeEscape() - { - int result = 0; - for (int i = 0; i < 4; i++) - { - char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence"); - int nybble; - if (c >= '0' && c <= '9') - { - nybble = c - '0'; - } - else if (c >= 'a' && c <= 'f') - { - nybble = c - 'a' + 10; - } - else if (c >= 'A' && c <= 'F') - { - nybble = c - 'A' + 10; - } - else - { - throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); - } - result = (result << 4) + nybble; - } - return (char) result; - } - - /// <summary> - /// Consumes a text-only literal, throwing an exception if the read text doesn't match it. - /// It is assumed that the first letter of the literal has already been read. - /// </summary> - private void ConsumeLiteral(string text) - { - for (int i = 1; i < text.Length; i++) - { - char? next = reader.Read(); - if (next == null) - { - throw reader.CreateException("Unexpected end of text while reading literal token " + text); - } - if (next.Value != text[i]) - { - throw reader.CreateException("Unexpected character while reading literal token " + text); - } - } - } - - private double ReadNumber(char initialCharacter) - { - StringBuilder builder = new StringBuilder(); - if (initialCharacter == '-') - { - builder.Append("-"); - } - else - { - reader.PushBack(initialCharacter); - } - // Each method returns the character it read that doesn't belong in that part, - // so we know what to do next, including pushing the character back at the end. - // null is returned for "end of text". - char? next = ReadInt(builder); - if (next == '.') - { - next = ReadFrac(builder); - } - if (next == 'e' || next == 'E') - { - next = ReadExp(builder); - } - // If we read a character which wasn't part of the number, push it back so we can read it again - // to parse the next token. - if (next != null) - { - reader.PushBack(next.Value); - } - - // TODO: What exception should we throw if the value can't be represented as a double? - try - { - return double.Parse(builder.ToString(), - NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, - CultureInfo.InvariantCulture); - } - catch (OverflowException) - { - throw reader.CreateException("Numeric value out of range: " + builder); - } - } - - private char? ReadInt(StringBuilder builder) - { - char first = reader.ReadOrFail("Invalid numeric literal"); - if (first < '0' || first > '9') - { - throw reader.CreateException("Invalid numeric literal"); - } - builder.Append(first); - int digitCount; - char? next = ConsumeDigits(builder, out digitCount); - if (first == '0' && digitCount != 0) - { - throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value."); - } - return next; - } - - private char? ReadFrac(StringBuilder builder) - { - builder.Append('.'); // Already consumed this - int digitCount; - char? next = ConsumeDigits(builder, out digitCount); - if (digitCount == 0) - { - throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits"); - } - return next; - } - - private char? ReadExp(StringBuilder builder) - { - builder.Append('E'); // Already consumed this (or 'e') - char? next = reader.Read(); - if (next == null) - { - throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits"); - } - if (next == '-' || next == '+') - { - builder.Append(next.Value); - } - else - { - reader.PushBack(next.Value); - } - int digitCount; - next = ConsumeDigits(builder, out digitCount); - if (digitCount == 0) - { - throw reader.CreateException("Invalid numeric literal: exponent without value"); - } - return next; - } - - private char? ConsumeDigits(StringBuilder builder, out int count) - { - count = 0; - while (true) - { - char? next = reader.Read(); - if (next == null || next.Value < '0' || next.Value > '9') - { - return next; - } - count++; - builder.Append(next.Value); - } - } - - /// <summary> - /// Validates that we're in a valid state to read a value (using the given error prefix if necessary) - /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty. - /// </summary> - private void ValidateAndModifyStateForValue(string errorPrefix) - { - ValidateState(ValueStates, errorPrefix); - switch (state) - { - case State.StartOfDocument: - state = State.ExpectedEndOfDocument; - return; - case State.ObjectAfterColon: - state = State.ObjectAfterProperty; - return; - case State.ArrayStart: - case State.ArrayAfterComma: - state = State.ArrayAfterValue; - return; - default: - throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)"); - } - } - - /// <summary> - /// Pops the top-most container, and sets the state to the appropriate one for the end of a value - /// in the parent container. - /// </summary> - private void PopContainer() - { - containerStack.Pop(); - var parent = containerStack.Peek(); - switch (parent) - { - case ContainerType.Object: - state = State.ObjectAfterProperty; - break; - case ContainerType.Array: - state = State.ArrayAfterValue; - break; - case ContainerType.Document: - state = State.ExpectedEndOfDocument; - break; - default: - throw new InvalidOperationException("Unexpected container type: " + parent); - } - } - - private enum ContainerType - { - Document, Object, Array - } - - /// <summary> - /// Possible states of the tokenizer. - /// </summary> - /// <remarks> - /// <para>This is a flags enum purely so we can simply and efficiently represent a set of valid states - /// for checking.</para> - /// <para> - /// Each is documented with an example, - /// where ^ represents the current position within the text stream. The examples all use string values, - /// but could be any value, including nested objects/arrays. - /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects). - /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which - /// point there's an immediate transition to ExpectedEndOfDocument, ObjectAfterProperty or ArrayAfterValue. - /// </para> - /// <para> - /// These states were derived manually by reading RFC 7159 carefully. - /// </para> - /// </remarks> - [Flags] - private enum State - { - /// <summary> - /// ^ { "foo": "bar" } - /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue" - /// </summary> - StartOfDocument = 1 << 0, - /// <summary> - /// { "foo": "bar" } ^ - /// After the value in a document. Next states: ReaderExhausted - /// </summary> - ExpectedEndOfDocument = 1 << 1, - /// <summary> - /// { "foo": "bar" } ^ (and already read to the end of the reader) - /// Terminal state. - /// </summary> - ReaderExhausted = 1 << 2, - /// <summary> - /// { ^ "foo": "bar" } - /// Before the *first* property in an object. - /// Next states: - /// "AfterValue" (empty object) - /// ObjectBeforeColon (read a name) - /// </summary> - ObjectStart = 1 << 3, - /// <summary> - /// { "foo" ^ : "bar", "x": "y" } - /// Next state: ObjectAfterColon - /// </summary> - ObjectBeforeColon = 1 << 4, - /// <summary> - /// { "foo" : ^ "bar", "x": "y" } - /// Before any property other than the first in an object. - /// (Equivalently: after any property in an object) - /// Next states: - /// "AfterValue" (value is simple) - /// ObjectStart (value is object) - /// ArrayStart (value is array) - /// </summary> - ObjectAfterColon = 1 << 5, - /// <summary> - /// { "foo" : "bar" ^ , "x" : "y" } - /// At the end of a property, so expecting either a comma or end-of-object - /// Next states: ObjectAfterComma or "AfterValue" - /// </summary> - ObjectAfterProperty = 1 << 6, - /// <summary> - /// { "foo":"bar", ^ "x":"y" } - /// Read the comma after the previous property, so expecting another property. - /// This is like ObjectStart, but closing brace isn't valid here - /// Next state: ObjectBeforeColon. - /// </summary> - ObjectAfterComma = 1 << 7, - /// <summary> - /// [ ^ "foo", "bar" ] - /// Before the *first* value in an array. - /// Next states: - /// "AfterValue" (read a value) - /// "AfterValue" (end of array; will pop stack) - /// </summary> - ArrayStart = 1 << 8, - /// <summary> - /// [ "foo" ^ , "bar" ] - /// After any value in an array, so expecting either a comma or end-of-array - /// Next states: ArrayAfterComma or "AfterValue" - /// </summary> - ArrayAfterValue = 1 << 9, - /// <summary> - /// [ "foo", ^ "bar" ] - /// After a comma in an array, so there *must* be another value (simple or complex). - /// Next states: "AfterValue" (simple value), StartObject, StartArray - /// </summary> - ArrayAfterComma = 1 << 10 - } - - /// <summary> - /// Wrapper around a text reader allowing small amounts of buffering and location handling. - /// </summary> - private class PushBackReader - { - // TODO: Add locations for errors etc. - - private readonly TextReader reader; - - internal PushBackReader(TextReader reader) - { - // TODO: Wrap the reader in a BufferedReader? - this.reader = reader; - } - - /// <summary> - /// The buffered next character, if we have one. - /// </summary> - private char? nextChar; - - /// <summary> - /// Returns the next character in the stream, or null if we have reached the end. - /// </summary> - /// <returns></returns> - internal char? Read() - { - if (nextChar != null) - { - char? tmp = nextChar; - nextChar = null; - return tmp; - } - int next = reader.Read(); - return next == -1 ? null : (char?) next; - } - - internal char ReadOrFail(string messageOnFailure) - { - char? next = Read(); - if (next == null) - { - throw CreateException(messageOnFailure); - } - return next.Value; - } - - internal void PushBack(char c) - { - if (nextChar != null) - { - throw new InvalidOperationException("Cannot push back when already buffering a character"); - } - nextChar = c; - } - - /// <summary> - /// Creates a new exception appropriate for the current state of the reader. - /// </summary> - internal InvalidJsonException CreateException(string message) - { - // TODO: Keep track of and use the location. - return new InvalidJsonException(message); - } - } - } - } -} |