diff options
author | Jon Skeet <jonskeet@google.com> | 2015-11-23 16:21:47 +0000 |
---|---|---|
committer | Jon Skeet <jonskeet@google.com> | 2015-12-02 07:54:34 +0000 |
commit | 3de2fced6be1cc5e8f321c5aee2bb43176be962a (patch) | |
tree | 2942a336c3eb43c792520bbfd75006f2b2f4c6a6 /csharp | |
parent | 567579b50517e4f7efc459ab1d9d5ee2577af024 (diff) |
Handle JSON parsing for Any.
This required a rework of the tokenizer to allow for a "replaying" tokenizer, basically in case the @type value comes after the data itself. This rework is nice in some ways (all the pushback and object depth logic in one place) but is a little fragile in terms of token push-back when using the replay tokenizer. It'll be fine for the scenario we need it for, but we should be careful...
Diffstat (limited to 'csharp')
-rw-r--r-- | csharp/src/Google.Protobuf.Test/JsonParserTest.cs | 50 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs | 14 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/JsonFormatter.cs | 11 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/JsonParser.cs | 149 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/JsonTokenizer.cs | 1046 |
5 files changed, 761 insertions, 509 deletions
diff --git a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs index b3664770..874489e4 100644 --- a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs @@ -30,6 +30,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endregion +using Google.Protobuf.Reflection; using Google.Protobuf.TestProtos; using Google.Protobuf.WellKnownTypes; using NUnit.Framework; @@ -718,6 +719,55 @@ namespace Google.Protobuf } [Test] + public void Any_RegularMessage() + { + var registry = TypeRegistry.FromMessages(TestAllTypes.Descriptor); + var formatter = new JsonFormatter(new JsonFormatter.Settings(false, TypeRegistry.FromMessages(TestAllTypes.Descriptor))); + var message = new TestAllTypes { SingleInt32 = 10, SingleNestedMessage = new TestAllTypes.Types.NestedMessage { Bb = 20 } }; + var original = Any.Pack(message); + var json = formatter.Format(original); // This is tested in JsonFormatterTest + var parser = new JsonParser(new JsonParser.Settings(10, registry)); + Assert.AreEqual(original, parser.Parse<Any>(json)); + string valueFirstJson = "{ \"singleInt32\": 10, \"singleNestedMessage\": { \"bb\": 20 }, \"@type\": \"type.googleapis.com/protobuf_unittest.TestAllTypes\" }"; + Assert.AreEqual(original, parser.Parse<Any>(valueFirstJson)); + } + + [Test] + public void Any_UnknownType() + { + string json = "{ \"@type\": \"type.googleapis.com/bogus\" }"; + Assert.Throws<InvalidOperationException>(() => Any.Parser.ParseJson(json)); + } + + [Test] + public void Any_WellKnownType() + { + var registry = TypeRegistry.FromMessages(Timestamp.Descriptor); + var formatter = new JsonFormatter(new JsonFormatter.Settings(false, registry)); + var timestamp = new DateTime(1673, 6, 19, 12, 34, 56, DateTimeKind.Utc).ToTimestamp(); + var original = Any.Pack(timestamp); + var json = formatter.Format(original); // This is tested in JsonFormatterTest + var parser = new JsonParser(new JsonParser.Settings(10, registry)); + Assert.AreEqual(original, parser.Parse<Any>(json)); + string valueFirstJson = "{ \"value\": \"1673-06-19T12:34:56Z\", \"@type\": \"type.googleapis.com/google.protobuf.Timestamp\" }"; + Assert.AreEqual(original, parser.Parse<Any>(valueFirstJson)); + } + + [Test] + public void Any_Nested() + { + var registry = TypeRegistry.FromMessages(TestWellKnownTypes.Descriptor, TestAllTypes.Descriptor); + var formatter = new JsonFormatter(new JsonFormatter.Settings(false, registry)); + var parser = new JsonParser(new JsonParser.Settings(10, registry)); + var doubleNestedMessage = new TestAllTypes { SingleInt32 = 20 }; + var nestedMessage = Any.Pack(doubleNestedMessage); + var message = new TestWellKnownTypes { AnyField = Any.Pack(nestedMessage) }; + var json = formatter.Format(message); + // Use the descriptor-based parser just for a change. + Assert.AreEqual(message, parser.Parse(json, TestWellKnownTypes.Descriptor)); + } + + [Test] public void DataAfterObject() { string json = "{} 10"; diff --git a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs index a38efeed..a0a62227 100644 --- a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs @@ -85,7 +85,7 @@ namespace Google.Protobuf public void ObjectDepth() { string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }"; - var tokenizer = new JsonTokenizer(new StringReader(json)); + var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json)); // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it. Assert.AreEqual(0, tokenizer.ObjectDepth); Assert.AreEqual(JsonToken.StartObject, tokenizer.Next()); @@ -118,7 +118,7 @@ namespace Google.Protobuf public void ObjectDepth_WithPushBack() { string json = "{}"; - var tokenizer = new JsonTokenizer(new StringReader(json)); + var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json)); Assert.AreEqual(0, tokenizer.ObjectDepth); var token = tokenizer.Next(); Assert.AreEqual(1, tokenizer.ObjectDepth); @@ -275,7 +275,7 @@ namespace Google.Protobuf // Note: we don't test that the earlier tokens are exactly as expected, // partly because that's hard to parameterize. var reader = new StringReader(json.Replace('\'', '"')); - var tokenizer = new JsonTokenizer(reader); + var tokenizer = JsonTokenizer.FromTextReader(reader); for (int i = 0; i < expectedValidTokens; i++) { Assert.IsNotNull(tokenizer.Next()); @@ -334,7 +334,7 @@ namespace Google.Protobuf [Test] public void NextAfterEndDocumentThrows() { - var tokenizer = new JsonTokenizer(new StringReader("null")); + var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null")); Assert.AreEqual(JsonToken.Null, tokenizer.Next()); Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); Assert.Throws<InvalidOperationException>(() => tokenizer.Next()); @@ -343,7 +343,7 @@ namespace Google.Protobuf [Test] public void CanPushBackEndDocument() { - var tokenizer = new JsonTokenizer(new StringReader("null")); + var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null")); Assert.AreEqual(JsonToken.Null, tokenizer.Next()); Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); tokenizer.PushBack(JsonToken.EndDocument); @@ -373,7 +373,7 @@ namespace Google.Protobuf private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens) { var reader = new StringReader(json); - var tokenizer = new JsonTokenizer(reader); + var tokenizer = JsonTokenizer.FromTextReader(reader); for (int i = 0; i < expectedTokens.Length; i++) { var actualToken = tokenizer.Next(); @@ -393,7 +393,7 @@ namespace Google.Protobuf private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens) { var reader = new StringReader(json); - var tokenizer = new JsonTokenizer(reader); + var tokenizer = JsonTokenizer.FromTextReader(reader); for (int i = 0; i < expectedTokens.Length; i++) { var actualToken = tokenizer.Next(); diff --git a/csharp/src/Google.Protobuf/JsonFormatter.cs b/csharp/src/Google.Protobuf/JsonFormatter.cs index c7d392cd..45941b39 100644 --- a/csharp/src/Google.Protobuf/JsonFormatter.cs +++ b/csharp/src/Google.Protobuf/JsonFormatter.cs @@ -808,12 +808,17 @@ namespace Google.Protobuf /// </summary> public sealed class Settings { - private static readonly Settings defaultInstance = new Settings(false); - /// <summary> /// Default settings, as used by <see cref="JsonFormatter.Default"/> /// </summary> - public static Settings Default { get; } = new Settings(false); + public static Settings Default { get; } + + // Workaround for the Mono compiler complaining about XML comments not being on + // valid language elements. + static Settings() + { + Default = new Settings(false); + } /// <summary> /// Whether fields whose values are the default for the field type (e.g. 0 for integers) diff --git a/csharp/src/Google.Protobuf/JsonParser.cs b/csharp/src/Google.Protobuf/JsonParser.cs index 2019029b..95f9ad35 100644 --- a/csharp/src/Google.Protobuf/JsonParser.cs +++ b/csharp/src/Google.Protobuf/JsonParser.cs @@ -77,6 +77,7 @@ namespace Google.Protobuf { ListValue.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, + { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) }, { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, { Int32Value.Descriptor.FullName, MergeWrapperField }, { Int64Value.Descriptor.FullName, MergeWrapperField }, @@ -128,7 +129,7 @@ namespace Google.Protobuf /// <param name="jsonReader">Reader providing the JSON to parse.</param> internal void Merge(IMessage message, TextReader jsonReader) { - var tokenizer = new JsonTokenizer(jsonReader); + var tokenizer = JsonTokenizer.FromTextReader(jsonReader); Merge(message, tokenizer); var lastToken = tokenizer.Next(); if (lastToken != JsonToken.EndDocument) @@ -338,6 +339,7 @@ namespace Google.Protobuf /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> public T Parse<T>(string json) where T : IMessage, new() { + Preconditions.CheckNotNull(json, nameof(json)); return Parse<T>(new StringReader(json)); } @@ -350,11 +352,42 @@ namespace Google.Protobuf /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> public T Parse<T>(TextReader jsonReader) where T : IMessage, new() { + Preconditions.CheckNotNull(jsonReader, nameof(jsonReader)); T message = new T(); Merge(message, jsonReader); return message; } + /// <summary> + /// Parses <paramref name="json"/> into a new message. + /// </summary> + /// <param name="json">The JSON to parse.</param> + /// <param name="descriptor">Descriptor of message type to parse.</param> + /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> + /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> + public IMessage Parse(string json, MessageDescriptor descriptor) + { + Preconditions.CheckNotNull(json, nameof(json)); + Preconditions.CheckNotNull(descriptor, nameof(descriptor)); + return Parse(new StringReader(json), descriptor); + } + + /// <summary> + /// Parses JSON read from <paramref name="jsonReader"/> into a new message. + /// </summary> + /// <param name="jsonReader">Reader providing the JSON to parse.</param> + /// <param name="descriptor">Descriptor of message type to parse.</param> + /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> + /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> + public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor) + { + Preconditions.CheckNotNull(jsonReader, nameof(jsonReader)); + Preconditions.CheckNotNull(descriptor, nameof(descriptor)); + IMessage message = descriptor.Parser.CreateTemplate(); + Merge(message, jsonReader); + return message; + } + private void MergeStructValue(IMessage message, JsonTokenizer tokenizer) { var firstToken = tokenizer.Next(); @@ -410,6 +443,83 @@ namespace Google.Protobuf MergeMapField(message, field, tokenizer); } + private void MergeAny(IMessage message, JsonTokenizer tokenizer) + { + // Record the token stream until we see the @type property. At that point, we can take the value, consult + // the type registry for the relevant message, and replay the stream, omitting the @type property. + var tokens = new List<JsonToken>(); + + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected object value for Any"); + } + int typeUrlObjectDepth = tokenizer.ObjectDepth; + + // The check for the property depth protects us from nested Any values which occur before the type URL + // for *this* Any. + while (token.Type != JsonToken.TokenType.Name || + token.StringValue != JsonFormatter.AnyTypeUrlField || + tokenizer.ObjectDepth != typeUrlObjectDepth) + { + tokens.Add(token); + token = tokenizer.Next(); + } + + // Don't add the @type property or its value to the recorded token list + token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for Any.@type"); + } + string typeUrl = token.StringValue; + string typeName = JsonFormatter.GetTypeName(typeUrl); + + MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName); + if (descriptor == null) + { + throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'"); + } + + // Now replay the token stream we've already read and anything that remains of the object, just parsing it + // as normal. Our original tokenizer should end up at the end of the object. + var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer); + var body = descriptor.Parser.CreateTemplate(); + if (descriptor.IsWellKnownType) + { + MergeWellKnownTypeAnyBody(body, replay); + } + else + { + Merge(body, replay); + } + var data = body.ToByteString(); + + // Now that we have the message data, we can pack it into an Any (the message received as a parameter). + message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl); + message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data); + } + + // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property + // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value + // itself, and then end-object. + private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); // Definitely start-object; checked in previous method + token = tokenizer.Next(); + // TODO: What about an absent Int32Value, for example? + if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField) + { + throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body"); + } + Merge(body, tokenizer); + token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.EndObject) + { + throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type"); + } + } + #region Utility methods which don't depend on the state (or settings) of the parser. private static object ParseMapKey(FieldDescriptor field, string keyText) { @@ -789,29 +899,48 @@ namespace Google.Protobuf /// </summary> public sealed class Settings { - private static readonly Settings defaultInstance = new Settings(CodedInputStream.DefaultRecursionLimit); - - private readonly int recursionLimit; - /// <summary> - /// Default settings, as used by <see cref="JsonParser.Default"/> + /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default + /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry. /// </summary> - public static Settings Default { get { return defaultInstance; } } + public static Settings Default { get; } + + // Workaround for the Mono compiler complaining about XML comments not being on + // valid language elements. + static Settings() + { + Default = new Settings(CodedInputStream.DefaultRecursionLimit); + } /// <summary> /// The maximum depth of messages to parse. Note that this limit only applies to parsing /// messages, not collections - so a message within a collection within a message only counts as /// depth 2, not 3. /// </summary> - public int RecursionLimit { get { return recursionLimit; } } + public int RecursionLimit { get; } + + /// <summary> + /// The type registry used to parse <see cref="Any"/> messages. + /// </summary> + public TypeRegistry TypeRegistry { get; } /// <summary> /// Creates a new <see cref="Settings"/> object with the specified recursion limit. /// </summary> /// <param name="recursionLimit">The maximum depth of messages to parse</param> - public Settings(int recursionLimit) + public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty) + { + } + + /// <summary> + /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry. + /// </summary> + /// <param name="recursionLimit">The maximum depth of messages to parse</param> + /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param> + public Settings(int recursionLimit, TypeRegistry typeRegistry) { - this.recursionLimit = recursionLimit; + RecursionLimit = recursionLimit; + TypeRegistry = Preconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)); } } } diff --git a/csharp/src/Google.Protobuf/JsonTokenizer.cs b/csharp/src/Google.Protobuf/JsonTokenizer.cs index 6589427a..09a6d43b 100644 --- a/csharp/src/Google.Protobuf/JsonTokenizer.cs +++ b/csharp/src/Google.Protobuf/JsonTokenizer.cs @@ -47,32 +47,38 @@ namespace Google.Protobuf /// between values. It validates the token stream as it goes - so callers can assume that the /// tokens it produces are appropriate. For example, it would never produce "start object, end array." /// </para> + /// <para>Implementation details: the base class handles single token push-back and </para> /// <para>Not thread-safe.</para> /// </remarks> - internal sealed class JsonTokenizer + internal abstract class JsonTokenizer { - // The set of states in which a value is valid next token. - private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument; - - private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>(); - private readonly PushBackReader reader; private JsonToken bufferedToken; - private State state; - private int objectDepth = 0; /// <summary> - /// Returns the depth of the stack, purely in objects (not collections). - /// Informally, this is the number of remaining unclosed '{' characters we have. + /// Creates a tokenizer that reads from the given text reader. /// </summary> - internal int ObjectDepth { get { return objectDepth; } } + internal static JsonTokenizer FromTextReader(TextReader reader) + { + return new JsonTextTokenizer(reader); + } - internal JsonTokenizer(TextReader reader) + /// <summary> + /// Creates a tokenizer that first replays the given list of tokens, then continues reading + /// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back + /// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was + /// created for the sake of Any parsing. + /// </summary> + internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation) { - this.reader = new PushBackReader(reader); - state = State.StartOfDocument; - containerStack.Push(ContainerType.Document); + return new JsonReplayTokenizer(tokens, continuation); } + /// <summary> + /// Returns the depth of the stack, purely in objects (not collections). + /// Informally, this is the number of remaining unclosed '{' characters we have. + /// </summary> + internal int ObjectDepth { get; private set; } + // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack). internal void PushBack(JsonToken token) @@ -84,11 +90,11 @@ namespace Google.Protobuf bufferedToken = token; if (token.Type == JsonToken.TokenType.StartObject) { - objectDepth--; + ObjectDepth--; } else if (token.Type == JsonToken.TokenType.EndObject) { - objectDepth++; + ObjectDepth++; } } @@ -96,574 +102,636 @@ namespace Google.Protobuf /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream, /// after which point <c>Next()</c> should not be called again. /// </summary> - /// <remarks> - /// This method essentially just loops through characters skipping whitespace, validating and - /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon) - /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point - /// it returns the token. Although the method is large, it would be relatively hard to break down further... most - /// of it is the large switch statement, which sometimes returns and sometimes doesn't. - /// </remarks> + /// <remarks>This implementation provides single-token buffering, and calls <see cref="NextImpl"/> if there is no buffered token.</remarks> /// <returns>The next token in the stream. This is never null.</returns> /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception> /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception> internal JsonToken Next() { + JsonToken tokenToReturn; if (bufferedToken != null) { - var ret = bufferedToken; + tokenToReturn = bufferedToken; bufferedToken = null; - if (ret.Type == JsonToken.TokenType.StartObject) - { - objectDepth++; - } - else if (ret.Type == JsonToken.TokenType.EndObject) - { - objectDepth--; - } - return ret; } - if (state == State.ReaderExhausted) + else { - throw new InvalidOperationException("Next() called after end of document"); + tokenToReturn = NextImpl(); } - while (true) + if (tokenToReturn.Type == JsonToken.TokenType.StartObject) { - var next = reader.Read(); - if (next == null) - { - ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: "); - state = State.ReaderExhausted; - return JsonToken.EndDocument; - } - switch (next.Value) - { - // Skip whitespace between tokens - case ' ': - case '\t': - case '\r': - case '\n': - break; - case ':': - ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: "); - state = State.ObjectAfterColon; - break; - case ',': - ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a colon: "); - state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma; - break; - case '"': - string stringValue = ReadString(); - if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0) - { - state = State.ObjectBeforeColon; - return JsonToken.Name(stringValue); - } - else - { - ValidateAndModifyStateForValue("Invalid state to read a double quote: "); - return JsonToken.Value(stringValue); - } - case '{': - ValidateState(ValueStates, "Invalid state to read an open brace: "); - state = State.ObjectStart; - containerStack.Push(ContainerType.Object); - objectDepth++; - return JsonToken.StartObject; - case '}': - ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); - PopContainer(); - objectDepth--; - return JsonToken.EndObject; - case '[': - ValidateState(ValueStates, "Invalid state to read an open square bracket: "); - state = State.ArrayStart; - containerStack.Push(ContainerType.Array); - return JsonToken.StartArray; - case ']': - ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: "); - PopContainer(); - return JsonToken.EndArray; - case 'n': // Start of null - ConsumeLiteral("null"); - ValidateAndModifyStateForValue("Invalid state to read a null literal: "); - return JsonToken.Null; - case 't': // Start of true - ConsumeLiteral("true"); - ValidateAndModifyStateForValue("Invalid state to read a true literal: "); - return JsonToken.True; - case 'f': // Start of false - ConsumeLiteral("false"); - ValidateAndModifyStateForValue("Invalid state to read a false literal: "); - return JsonToken.False; - case '-': // Start of a number - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - double number = ReadNumber(next.Value); - ValidateAndModifyStateForValue("Invalid state to read a number token: "); - return JsonToken.Value(number); - default: - throw new InvalidJsonException("Invalid first character of token: " + next.Value); - } + ObjectDepth++; } - } - - private void ValidateState(State validStates, string errorPrefix) - { - if ((validStates & state) == 0) + else if (tokenToReturn.Type == JsonToken.TokenType.EndObject) { - throw reader.CreateException(errorPrefix + state); + ObjectDepth--; } + return tokenToReturn; } /// <summary> - /// Reads a string token. It is assumed that the opening " has already been read. + /// Returns the next JSON token in the stream, when requested by the base class. (The <see cref="Next"/> method delegates + /// to this if it doesn't have a buffered token.) /// </summary> - private string ReadString() - { - var value = new StringBuilder(); - bool haveHighSurrogate = false; - while (true) - { - char c = reader.ReadOrFail("Unexpected end of text while reading string"); - if (c < ' ') - { - throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c)); - } - if (c == '"') - { - if (haveHighSurrogate) - { - throw reader.CreateException("Invalid use of surrogate pair code units"); - } - return value.ToString(); - } - if (c == '\\') - { - c = ReadEscapedCharacter(); - } - // TODO: Consider only allowing surrogate pairs that are either both escaped, - // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate - // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8. - if (haveHighSurrogate != char.IsLowSurrogate(c)) - { - throw reader.CreateException("Invalid use of surrogate pair code units"); - } - haveHighSurrogate = char.IsHighSurrogate(c); - value.Append(c); - } - } + /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception> + /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception> + protected abstract JsonToken NextImpl(); /// <summary> - /// Reads an escaped character. It is assumed that the leading backslash has already been read. + /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer. /// </summary> - private char ReadEscapedCharacter() + private class JsonReplayTokenizer : JsonTokenizer { - char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence"); - switch (c) + private readonly IList<JsonToken> tokens; + private readonly JsonTokenizer nextTokenizer; + private int nextTokenIndex; + + internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer) { - case 'n': - return '\n'; - case '\\': - return '\\'; - case 'b': - return '\b'; - case 'f': - return '\f'; - case 'r': - return '\r'; - case 't': - return '\t'; - case '"': - return '"'; - case '/': - return '/'; - case 'u': - return ReadUnicodeEscape(); - default: - throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); + this.tokens = tokens; + this.nextTokenizer = nextTokenizer; } - } - /// <summary> - /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read. - /// </summary> - private char ReadUnicodeEscape() - { - int result = 0; - for (int i = 0; i < 4; i++) + // FIXME: Object depth not maintained... + protected override JsonToken NextImpl() { - char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence"); - int nybble; - if (c >= '0' && c <= '9') + if (nextTokenIndex >= tokens.Count) { - nybble = c - '0'; + return nextTokenizer.Next(); } - else if (c >= 'a' && c <= 'f') - { - nybble = c - 'a' + 10; - } - else if (c >= 'A' && c <= 'F') - { - nybble = c - 'A' + 10; - } - else - { - throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); - } - result = (result << 4) + nybble; + return tokens[nextTokenIndex++]; } - return (char) result; } /// <summary> - /// Consumes a text-only literal, throwing an exception if the read text doesn't match it. - /// It is assumed that the first letter of the literal has already been read. + /// Tokenizer which does all the *real* work of parsing JSON. /// </summary> - private void ConsumeLiteral(string text) + private sealed class JsonTextTokenizer : JsonTokenizer { - for (int i = 1; i < text.Length; i++) + // The set of states in which a value is valid next token. + private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument; + + private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>(); + private readonly PushBackReader reader; + private State state; + + internal JsonTextTokenizer(TextReader reader) { - char? next = reader.Read(); - if (next == null) + this.reader = new PushBackReader(reader); + state = State.StartOfDocument; + containerStack.Push(ContainerType.Document); + } + + /// <remarks> + /// This method essentially just loops through characters skipping whitespace, validating and + /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon) + /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point + /// it returns the token. Although the method is large, it would be relatively hard to break down further... most + /// of it is the large switch statement, which sometimes returns and sometimes doesn't. + /// </remarks> + protected override JsonToken NextImpl() + { + if (state == State.ReaderExhausted) { - throw reader.CreateException("Unexpected end of text while reading literal token " + text); + throw new InvalidOperationException("Next() called after end of document"); } - if (next.Value != text[i]) + while (true) { - throw reader.CreateException("Unexpected character while reading literal token " + text); + var next = reader.Read(); + if (next == null) + { + ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: "); + state = State.ReaderExhausted; + return JsonToken.EndDocument; + } + switch (next.Value) + { + // Skip whitespace between tokens + case ' ': + case '\t': + case '\r': + case '\n': + break; + case ':': + ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: "); + state = State.ObjectAfterColon; + break; + case ',': + ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a colon: "); + state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma; + break; + case '"': + string stringValue = ReadString(); + if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0) + { + state = State.ObjectBeforeColon; + return JsonToken.Name(stringValue); + } + else + { + ValidateAndModifyStateForValue("Invalid state to read a double quote: "); + return JsonToken.Value(stringValue); + } + case '{': + ValidateState(ValueStates, "Invalid state to read an open brace: "); + state = State.ObjectStart; + containerStack.Push(ContainerType.Object); + return JsonToken.StartObject; + case '}': + ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); + PopContainer(); + return JsonToken.EndObject; + case '[': + ValidateState(ValueStates, "Invalid state to read an open square bracket: "); + state = State.ArrayStart; + containerStack.Push(ContainerType.Array); + return JsonToken.StartArray; + case ']': + ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: "); + PopContainer(); + return JsonToken.EndArray; + case 'n': // Start of null + ConsumeLiteral("null"); + ValidateAndModifyStateForValue("Invalid state to read a null literal: "); + return JsonToken.Null; + case 't': // Start of true + ConsumeLiteral("true"); + ValidateAndModifyStateForValue("Invalid state to read a true literal: "); + return JsonToken.True; + case 'f': // Start of false + ConsumeLiteral("false"); + ValidateAndModifyStateForValue("Invalid state to read a false literal: "); + return JsonToken.False; + case '-': // Start of a number + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + double number = ReadNumber(next.Value); + ValidateAndModifyStateForValue("Invalid state to read a number token: "); + return JsonToken.Value(number); + default: + throw new InvalidJsonException("Invalid first character of token: " + next.Value); + } } } - } - private double ReadNumber(char initialCharacter) - { - StringBuilder builder = new StringBuilder(); - if (initialCharacter == '-') - { - builder.Append("-"); - } - else - { - reader.PushBack(initialCharacter); - } - // Each method returns the character it read that doesn't belong in that part, - // so we know what to do next, including pushing the character back at the end. - // null is returned for "end of text". - char? next = ReadInt(builder); - if (next == '.') + private void ValidateState(State validStates, string errorPrefix) { - next = ReadFrac(builder); - } - if (next == 'e' || next == 'E') - { - next = ReadExp(builder); - } - // If we read a character which wasn't part of the number, push it back so we can read it again - // to parse the next token. - if (next != null) - { - reader.PushBack(next.Value); + if ((validStates & state) == 0) + { + throw reader.CreateException(errorPrefix + state); + } } - // TODO: What exception should we throw if the value can't be represented as a double? - try - { - return double.Parse(builder.ToString(), - NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, - CultureInfo.InvariantCulture); - } - catch (OverflowException) + /// <summary> + /// Reads a string token. It is assumed that the opening " has already been read. + /// </summary> + private string ReadString() { - throw reader.CreateException("Numeric value out of range: " + builder); + var value = new StringBuilder(); + bool haveHighSurrogate = false; + while (true) + { + char c = reader.ReadOrFail("Unexpected end of text while reading string"); + if (c < ' ') + { + throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c)); + } + if (c == '"') + { + if (haveHighSurrogate) + { + throw reader.CreateException("Invalid use of surrogate pair code units"); + } + return value.ToString(); + } + if (c == '\\') + { + c = ReadEscapedCharacter(); + } + // TODO: Consider only allowing surrogate pairs that are either both escaped, + // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate + // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8. + if (haveHighSurrogate != char.IsLowSurrogate(c)) + { + throw reader.CreateException("Invalid use of surrogate pair code units"); + } + haveHighSurrogate = char.IsHighSurrogate(c); + value.Append(c); + } } - } - private char? ReadInt(StringBuilder builder) - { - char first = reader.ReadOrFail("Invalid numeric literal"); - if (first < '0' || first > '9') - { - throw reader.CreateException("Invalid numeric literal"); - } - builder.Append(first); - int digitCount; - char? next = ConsumeDigits(builder, out digitCount); - if (first == '0' && digitCount != 0) + /// <summary> + /// Reads an escaped character. It is assumed that the leading backslash has already been read. + /// </summary> + private char ReadEscapedCharacter() { - throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value."); + char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence"); + switch (c) + { + case 'n': + return '\n'; + case '\\': + return '\\'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'r': + return '\r'; + case 't': + return '\t'; + case '"': + return '"'; + case '/': + return '/'; + case 'u': + return ReadUnicodeEscape(); + default: + throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); + } } - return next; - } - private char? ReadFrac(StringBuilder builder) - { - builder.Append('.'); // Already consumed this - int digitCount; - char? next = ConsumeDigits(builder, out digitCount); - if (digitCount == 0) + /// <summary> + /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read. + /// </summary> + private char ReadUnicodeEscape() { - throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits"); + int result = 0; + for (int i = 0; i < 4; i++) + { + char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence"); + int nybble; + if (c >= '0' && c <= '9') + { + nybble = c - '0'; + } + else if (c >= 'a' && c <= 'f') + { + nybble = c - 'a' + 10; + } + else if (c >= 'A' && c <= 'F') + { + nybble = c - 'A' + 10; + } + else + { + throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); + } + result = (result << 4) + nybble; + } + return (char) result; } - return next; - } - private char? ReadExp(StringBuilder builder) - { - builder.Append('E'); // Already consumed this (or 'e') - char? next = reader.Read(); - if (next == null) + /// <summary> + /// Consumes a text-only literal, throwing an exception if the read text doesn't match it. + /// It is assumed that the first letter of the literal has already been read. + /// </summary> + private void ConsumeLiteral(string text) { - throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits"); + for (int i = 1; i < text.Length; i++) + { + char? next = reader.Read(); + if (next == null) + { + throw reader.CreateException("Unexpected end of text while reading literal token " + text); + } + if (next.Value != text[i]) + { + throw reader.CreateException("Unexpected character while reading literal token " + text); + } + } } - if (next == '-' || next == '+') + + private double ReadNumber(char initialCharacter) { - builder.Append(next.Value); + StringBuilder builder = new StringBuilder(); + if (initialCharacter == '-') + { + builder.Append("-"); + } + else + { + reader.PushBack(initialCharacter); + } + // Each method returns the character it read that doesn't belong in that part, + // so we know what to do next, including pushing the character back at the end. + // null is returned for "end of text". + char? next = ReadInt(builder); + if (next == '.') + { + next = ReadFrac(builder); + } + if (next == 'e' || next == 'E') + { + next = ReadExp(builder); + } + // If we read a character which wasn't part of the number, push it back so we can read it again + // to parse the next token. + if (next != null) + { + reader.PushBack(next.Value); + } + + // TODO: What exception should we throw if the value can't be represented as a double? + try + { + return double.Parse(builder.ToString(), + NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, + CultureInfo.InvariantCulture); + } + catch (OverflowException) + { + throw reader.CreateException("Numeric value out of range: " + builder); + } } - else + + private char? ReadInt(StringBuilder builder) { - reader.PushBack(next.Value); + char first = reader.ReadOrFail("Invalid numeric literal"); + if (first < '0' || first > '9') + { + throw reader.CreateException("Invalid numeric literal"); + } + builder.Append(first); + int digitCount; + char? next = ConsumeDigits(builder, out digitCount); + if (first == '0' && digitCount != 0) + { + throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value."); + } + return next; } - int digitCount; - next = ConsumeDigits(builder, out digitCount); - if (digitCount == 0) + + private char? ReadFrac(StringBuilder builder) { - throw reader.CreateException("Invalid numeric literal: exponent without value"); + builder.Append('.'); // Already consumed this + int digitCount; + char? next = ConsumeDigits(builder, out digitCount); + if (digitCount == 0) + { + throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits"); + } + return next; } - return next; - } - private char? ConsumeDigits(StringBuilder builder, out int count) - { - count = 0; - while (true) + private char? ReadExp(StringBuilder builder) { + builder.Append('E'); // Already consumed this (or 'e') char? next = reader.Read(); - if (next == null || next.Value < '0' || next.Value > '9') + if (next == null) + { + throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits"); + } + if (next == '-' || next == '+') { - return next; + builder.Append(next.Value); } - count++; - builder.Append(next.Value); + else + { + reader.PushBack(next.Value); + } + int digitCount; + next = ConsumeDigits(builder, out digitCount); + if (digitCount == 0) + { + throw reader.CreateException("Invalid numeric literal: exponent without value"); + } + return next; } - } - /// <summary> - /// Validates that we're in a valid state to read a value (using the given error prefix if necessary) - /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty. - /// </summary> - private void ValidateAndModifyStateForValue(string errorPrefix) - { - ValidateState(ValueStates, errorPrefix); - switch (state) + private char? ConsumeDigits(StringBuilder builder, out int count) { - case State.StartOfDocument: - state = State.ExpectedEndOfDocument; - return; - case State.ObjectAfterColon: - state = State.ObjectAfterProperty; - return; - case State.ArrayStart: - case State.ArrayAfterComma: - state = State.ArrayAfterValue; - return; - default: - throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)"); + count = 0; + while (true) + { + char? next = reader.Read(); + if (next == null || next.Value < '0' || next.Value > '9') + { + return next; + } + count++; + builder.Append(next.Value); + } } - } - /// <summary> - /// Pops the top-most container, and sets the state to the appropriate one for the end of a value - /// in the parent container. - /// </summary> - private void PopContainer() - { - containerStack.Pop(); - var parent = containerStack.Peek(); - switch (parent) + /// <summary> + /// Validates that we're in a valid state to read a value (using the given error prefix if necessary) + /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty. + /// </summary> + private void ValidateAndModifyStateForValue(string errorPrefix) { - case ContainerType.Object: - state = State.ObjectAfterProperty; - break; - case ContainerType.Array: - state = State.ArrayAfterValue; - break; - case ContainerType.Document: - state = State.ExpectedEndOfDocument; - break; - default: - throw new InvalidOperationException("Unexpected container type: " + parent); + ValidateState(ValueStates, errorPrefix); + switch (state) + { + case State.StartOfDocument: + state = State.ExpectedEndOfDocument; + return; + case State.ObjectAfterColon: + state = State.ObjectAfterProperty; + return; + case State.ArrayStart: + case State.ArrayAfterComma: + state = State.ArrayAfterValue; + return; + default: + throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)"); + } } - } - private enum ContainerType - { - Document, Object, Array - } - - /// <summary> - /// Possible states of the tokenizer. - /// </summary> - /// <remarks> - /// <para>This is a flags enum purely so we can simply and efficiently represent a set of valid states - /// for checking.</para> - /// <para> - /// Each is documented with an example, - /// where ^ represents the current position within the text stream. The examples all use string values, - /// but could be any value, including nested objects/arrays. - /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects). - /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which - /// point there's an immediate transition to ExpectedEndOfDocument, ObjectAfterProperty or ArrayAfterValue. - /// </para> - /// <para> - /// These states were derived manually by reading RFC 7159 carefully. - /// </para> - /// </remarks> - [Flags] - private enum State - { - /// <summary> - /// ^ { "foo": "bar" } - /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue" - /// </summary> - StartOfDocument = 1 << 0, - /// <summary> - /// { "foo": "bar" } ^ - /// After the value in a document. Next states: ReaderExhausted - /// </summary> - ExpectedEndOfDocument = 1 << 1, - /// <summary> - /// { "foo": "bar" } ^ (and already read to the end of the reader) - /// Terminal state. - /// </summary> - ReaderExhausted = 1 << 2, - /// <summary> - /// { ^ "foo": "bar" } - /// Before the *first* property in an object. - /// Next states: - /// "AfterValue" (empty object) - /// ObjectBeforeColon (read a name) - /// </summary> - ObjectStart = 1 << 3, - /// <summary> - /// { "foo" ^ : "bar", "x": "y" } - /// Next state: ObjectAfterColon - /// </summary> - ObjectBeforeColon = 1 << 4, - /// <summary> - /// { "foo" : ^ "bar", "x": "y" } - /// Before any property other than the first in an object. - /// (Equivalently: after any property in an object) - /// Next states: - /// "AfterValue" (value is simple) - /// ObjectStart (value is object) - /// ArrayStart (value is array) - /// </summary> - ObjectAfterColon = 1 << 5, - /// <summary> - /// { "foo" : "bar" ^ , "x" : "y" } - /// At the end of a property, so expecting either a comma or end-of-object - /// Next states: ObjectAfterComma or "AfterValue" - /// </summary> - ObjectAfterProperty = 1 << 6, - /// <summary> - /// { "foo":"bar", ^ "x":"y" } - /// Read the comma after the previous property, so expecting another property. - /// This is like ObjectStart, but closing brace isn't valid here - /// Next state: ObjectBeforeColon. - /// </summary> - ObjectAfterComma = 1 << 7, - /// <summary> - /// [ ^ "foo", "bar" ] - /// Before the *first* value in an array. - /// Next states: - /// "AfterValue" (read a value) - /// "AfterValue" (end of array; will pop stack) - /// </summary> - ArrayStart = 1 << 8, - /// <summary> - /// [ "foo" ^ , "bar" ] - /// After any value in an array, so expecting either a comma or end-of-array - /// Next states: ArrayAfterComma or "AfterValue" - /// </summary> - ArrayAfterValue = 1 << 9, /// <summary> - /// [ "foo", ^ "bar" ] - /// After a comma in an array, so there *must* be another value (simple or complex). - /// Next states: "AfterValue" (simple value), StartObject, StartArray + /// Pops the top-most container, and sets the state to the appropriate one for the end of a value + /// in the parent container. /// </summary> - ArrayAfterComma = 1 << 10 - } - - /// <summary> - /// Wrapper around a text reader allowing small amounts of buffering and location handling. - /// </summary> - private class PushBackReader - { - // TODO: Add locations for errors etc. - - private readonly TextReader reader; + private void PopContainer() + { + containerStack.Pop(); + var parent = containerStack.Peek(); + switch (parent) + { + case ContainerType.Object: + state = State.ObjectAfterProperty; + break; + case ContainerType.Array: + state = State.ArrayAfterValue; + break; + case ContainerType.Document: + state = State.ExpectedEndOfDocument; + break; + default: + throw new InvalidOperationException("Unexpected container type: " + parent); + } + } - internal PushBackReader(TextReader reader) + private enum ContainerType { - // TODO: Wrap the reader in a BufferedReader? - this.reader = reader; + Document, Object, Array } /// <summary> - /// The buffered next character, if we have one. + /// Possible states of the tokenizer. /// </summary> - private char? nextChar; + /// <remarks> + /// <para>This is a flags enum purely so we can simply and efficiently represent a set of valid states + /// for checking.</para> + /// <para> + /// Each is documented with an example, + /// where ^ represents the current position within the text stream. The examples all use string values, + /// but could be any value, including nested objects/arrays. + /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects). + /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which + /// point there's an immediate transition to ExpectedEndOfDocument, ObjectAfterProperty or ArrayAfterValue. + /// </para> + /// <para> + /// These states were derived manually by reading RFC 7159 carefully. + /// </para> + /// </remarks> + [Flags] + private enum State + { + /// <summary> + /// ^ { "foo": "bar" } + /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue" + /// </summary> + StartOfDocument = 1 << 0, + /// <summary> + /// { "foo": "bar" } ^ + /// After the value in a document. Next states: ReaderExhausted + /// </summary> + ExpectedEndOfDocument = 1 << 1, + /// <summary> + /// { "foo": "bar" } ^ (and already read to the end of the reader) + /// Terminal state. + /// </summary> + ReaderExhausted = 1 << 2, + /// <summary> + /// { ^ "foo": "bar" } + /// Before the *first* property in an object. + /// Next states: + /// "AfterValue" (empty object) + /// ObjectBeforeColon (read a name) + /// </summary> + ObjectStart = 1 << 3, + /// <summary> + /// { "foo" ^ : "bar", "x": "y" } + /// Next state: ObjectAfterColon + /// </summary> + ObjectBeforeColon = 1 << 4, + /// <summary> + /// { "foo" : ^ "bar", "x": "y" } + /// Before any property other than the first in an object. + /// (Equivalently: after any property in an object) + /// Next states: + /// "AfterValue" (value is simple) + /// ObjectStart (value is object) + /// ArrayStart (value is array) + /// </summary> + ObjectAfterColon = 1 << 5, + /// <summary> + /// { "foo" : "bar" ^ , "x" : "y" } + /// At the end of a property, so expecting either a comma or end-of-object + /// Next states: ObjectAfterComma or "AfterValue" + /// </summary> + ObjectAfterProperty = 1 << 6, + /// <summary> + /// { "foo":"bar", ^ "x":"y" } + /// Read the comma after the previous property, so expecting another property. + /// This is like ObjectStart, but closing brace isn't valid here + /// Next state: ObjectBeforeColon. + /// </summary> + ObjectAfterComma = 1 << 7, + /// <summary> + /// [ ^ "foo", "bar" ] + /// Before the *first* value in an array. + /// Next states: + /// "AfterValue" (read a value) + /// "AfterValue" (end of array; will pop stack) + /// </summary> + ArrayStart = 1 << 8, + /// <summary> + /// [ "foo" ^ , "bar" ] + /// After any value in an array, so expecting either a comma or end-of-array + /// Next states: ArrayAfterComma or "AfterValue" + /// </summary> + ArrayAfterValue = 1 << 9, + /// <summary> + /// [ "foo", ^ "bar" ] + /// After a comma in an array, so there *must* be another value (simple or complex). + /// Next states: "AfterValue" (simple value), StartObject, StartArray + /// </summary> + ArrayAfterComma = 1 << 10 + } /// <summary> - /// Returns the next character in the stream, or null if we have reached the end. + /// Wrapper around a text reader allowing small amounts of buffering and location handling. /// </summary> - /// <returns></returns> - internal char? Read() + private class PushBackReader { - if (nextChar != null) + // TODO: Add locations for errors etc. + + private readonly TextReader reader; + + internal PushBackReader(TextReader reader) { - char? tmp = nextChar; - nextChar = null; - return tmp; + // TODO: Wrap the reader in a BufferedReader? + this.reader = reader; } - int next = reader.Read(); - return next == -1 ? null : (char?) next; - } - internal char ReadOrFail(string messageOnFailure) - { - char? next = Read(); - if (next == null) + /// <summary> + /// The buffered next character, if we have one. + /// </summary> + private char? nextChar; + + /// <summary> + /// Returns the next character in the stream, or null if we have reached the end. + /// </summary> + /// <returns></returns> + internal char? Read() { - throw CreateException(messageOnFailure); + if (nextChar != null) + { + char? tmp = nextChar; + nextChar = null; + return tmp; + } + int next = reader.Read(); + return next == -1 ? null : (char?) next; } - return next.Value; - } - internal void PushBack(char c) - { - if (nextChar != null) + internal char ReadOrFail(string messageOnFailure) { - throw new InvalidOperationException("Cannot push back when already buffering a character"); + char? next = Read(); + if (next == null) + { + throw CreateException(messageOnFailure); + } + return next.Value; } - nextChar = c; - } - /// <summary> - /// Creates a new exception appropriate for the current state of the reader. - /// </summary> - internal InvalidJsonException CreateException(string message) - { - // TODO: Keep track of and use the location. - return new InvalidJsonException(message); + internal void PushBack(char c) + { + if (nextChar != null) + { + throw new InvalidOperationException("Cannot push back when already buffering a character"); + } + nextChar = c; + } + + /// <summary> + /// Creates a new exception appropriate for the current state of the reader. + /// </summary> + internal InvalidJsonException CreateException(string message) + { + // TODO: Keep track of and use the location. + return new InvalidJsonException(message); + } } } } |