Implement JSON parsing in C#.

This includes all the well-known types except Any. Some aspects are likely to require further work when the details of the JSON parsing expectations are hammered out in more detail. Some of these have "ignored" tests already. Note that the choice *not* to use Json.NET was made for two reasons: - Going from 0 dependencies to 1 dependency is a big hit, and there's not much benefit here - Json.NET parses more leniently than we'd want; accommodating that would be nearly as much work as writing the tokenizer This only really affects the JsonTokenizer, which could be replaced by Json.NET. The JsonParser code would be about the same length with Json.NET... but I wouldn't be as confident in it.
author: Jon Skeet <jonskeet@google.com> 2015-09-04 12:41:14 +0100
committer: Jon Skeet <jonskeet@google.com> 2015-11-03 19:05:11 +0000
commit: fb2488225fbd239f7880e3b493cbfd2f19da755b (patch)
tree: e51173d483b91a77708dd3bdbffa1577bf89bbec /csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs
parent: aa3675415ec8da317793a3e151e44daaebc21ff8 (diff)
1 files changed, 352 insertions, 0 deletions
diff --git a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs
new file mode 100644
index 00000000..868d9f75
--- /dev/null
+++ b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs
@@ -0,0 +1,352 @@
+#region Copyright notice and license
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#endregion
+using NUnit.Framework;
+using System;
+using System.IO;
+
+namespace Google.Protobuf
+{
+    public class JsonTokenizerTest
+    {
+        [Test]
+        public void EmptyObjectValue()
+        {
+            AssertTokens("{}", JsonToken.StartObject, JsonToken.EndObject);
+        }
+
+        [Test]
+        public void EmptyArrayValue()
+        {
+            AssertTokens("[]", JsonToken.StartArray, JsonToken.EndArray);
+        }
+
+        [Test]
+        [TestCase("foo", "foo")]
+        [TestCase("tab\\t", "tab\t")]
+        [TestCase("line\\nfeed", "line\nfeed")]
+        [TestCase("carriage\\rreturn", "carriage\rreturn")]
+        [TestCase("back\\bspace", "back\bspace")]
+        [TestCase("form\\ffeed", "form\ffeed")]
+        [TestCase("escaped\\/slash", "escaped/slash")]
+        [TestCase("escaped\\\\backslash", "escaped\\backslash")]
+        [TestCase("escaped\\\"quote", "escaped\"quote")]
+        [TestCase("foo {}[] bar", "foo {}[] bar")]
+        [TestCase("foo\\u09aFbar", "foo\u09afbar")] // Digits, upper hex, lower hex
+        [TestCase("ab\ud800\udc00cd", "ab\ud800\udc00cd")]
+        [TestCase("ab\\ud800\\udc00cd", "ab\ud800\udc00cd")]
+        public void StringValue(string json, string expectedValue)
+        {
+            AssertTokensNoReplacement("\"" + json + "\"", JsonToken.Value(expectedValue));
+        }
+
+        // Valid surrogate pairs, with mixed escaping. These test cases can't be expressed
+        // using TestCase as they have no valid UTF-8 representation.
+        // It's unclear exactly how we should handle a mixture of escaped or not: that can't
+        // come from UTF-8 text, but could come from a .NET string. For the moment,
+        // treat it as valid in the obvious way.
+        [Test]
+        public void MixedSurrogatePairs()
+        {
+            string expected = "\ud800\udc00";
+            AssertTokens("'\\ud800\udc00'", JsonToken.Value(expected));
+            AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected));
+        }
+
+        [Test]
+        [TestCase("embedded tab\t")]
+        [TestCase("embedded CR\r")]
+        [TestCase("embedded LF\n")]
+        [TestCase("embedded bell\u0007")]
+        [TestCase("bad escape\\a")]
+        [TestCase("incomplete escape\\")]
+        [TestCase("incomplete Unicode escape\\u000")]
+        [TestCase("invalid Unicode escape\\u000H")]
+        // Surrogate pair handling, both in raw .NET strings and escaped. We only need
+        // to detect this in strings, as non-ASCII characters anywhere other than in strings
+        // will already lead to parsing errors.
+        [TestCase("\\ud800")]
+        [TestCase("\\udc00")]
+        [TestCase("\\ud800x")]
+        [TestCase("\\udc00x")]
+        [TestCase("\\udc00\\ud800y")]
+        public void InvalidStringValue(string json)
+        {
+            AssertThrowsAfter("\"" + json + "\"");
+        }
+
+        // Tests for invalid strings that can't be expressed in attributes,
+        // as the constants can't be expressed as UTF-8 strings.
+        [Test]
+        public void InvalidSurrogatePairs()
+        {
+            AssertThrowsAfter("\"\ud800x\"");
+            AssertThrowsAfter("\"\udc00y\"");
+            AssertThrowsAfter("\"\udc00\ud800y\"");
+        }
+
+        [Test]
+        [TestCase("0", 0)]
+        [TestCase("-0", 0)] // We don't distinguish between positive and negative 0
+        [TestCase("1", 1)]
+        [TestCase("-1", -1)]
+        // From here on, assume leading sign is okay...
+        [TestCase("1.125", 1.125)]
+        [TestCase("1.0", 1)]
+        [TestCase("1e5", 100000)]
+        [TestCase("1e000000", 1)] // Weird, but not prohibited by the spec
+        [TestCase("1E5", 100000)]
+        [TestCase("1e+5", 100000)]
+        [TestCase("1E-5", 0.00001)]
+        [TestCase("123E-2", 1.23)]
+        [TestCase("123.45E3", 123450)]
+        [TestCase("   1   ", 1)]
+        public void NumberValue(string json, double expectedValue)
+        {
+            AssertTokens(json, JsonToken.Value(expectedValue));
+        }
+
+        [Test]
+        [TestCase("00")]
+        [TestCase(".5")]
+        [TestCase("1.")]
+        [TestCase("1e")]
+        [TestCase("1e-")]
+        [TestCase("--")]
+        [TestCase("--1")]
+        [TestCase("-1.7977e308")]
+        [TestCase("1.7977e308")]
+        public void InvalidNumberValue(string json)
+        {
+            AssertThrowsAfter(json);
+        }
+
+        [Test]
+        [TestCase("nul")]
+        [TestCase("nothing")]
+        [TestCase("truth")]
+        [TestCase("fALSEhood")]
+        public void InvalidLiterals(string json)
+        {
+            AssertThrowsAfter(json);
+        }
+
+        [Test]
+        public void NullValue()
+        {
+            AssertTokens("null", JsonToken.Null);
+        }
+
+        [Test]
+        public void TrueValue()
+        {
+            AssertTokens("true", JsonToken.True);
+        }
+
+        [Test]
+        public void FalseValue()
+        {
+            AssertTokens("false", JsonToken.False);
+        }
+
+        [Test]
+        public void SimpleObject()
+        {
+            AssertTokens("{'x': 'y'}",
+                JsonToken.StartObject, JsonToken.Name("x"), JsonToken.Value("y"), JsonToken.EndObject);
+        }
+        
+        [Test]
+        [TestCase("[10, 20", 3)]
+        [TestCase("[10,", 2)]
+        [TestCase("[10:20]", 2)]
+        [TestCase("[", 1)]
+        [TestCase("[,", 1)]
+        [TestCase("{", 1)]
+        [TestCase("{,", 1)]
+        [TestCase("{", 1)]
+        [TestCase("{[", 1)]
+        [TestCase("{{", 1)]
+        [TestCase("{0", 1)]
+        [TestCase("{null", 1)]
+        [TestCase("{false", 1)]
+        [TestCase("{true", 1)]
+        [TestCase("}", 0)]
+        [TestCase("]", 0)]
+        [TestCase(",", 0)]
+        [TestCase("'foo' 'bar'", 1)]
+        [TestCase(":", 0)]
+        [TestCase("'foo", 0)] // Incomplete string
+        [TestCase("{ 'foo' }", 2)]
+        [TestCase("{ x:1", 1)] // Property names must be quoted
+        [TestCase("{]", 1)]
+        [TestCase("[}", 1)]
+        [TestCase("[1,", 2)]
+        [TestCase("{'x':0]", 3)]
+        [TestCase("{ 'foo': }", 2)]
+        [TestCase("{ 'foo':'bar', }", 3)]
+        public void InvalidStructure(string json, int expectedValidTokens)
+        {
+            // Note: we don't test that the earlier tokens are exactly as expected,
+            // partly because that's hard to parameterize.
+            var reader = new StringReader(json.Replace('\'', '"'));
+            var tokenizer = new JsonTokenizer(reader);
+            for (int i = 0; i < expectedValidTokens; i++)
+            {
+                Assert.IsNotNull(tokenizer.Next());
+            }
+            Assert.Throws<InvalidProtocolBufferException>(() => tokenizer.Next());
+        }
+
+        [Test]
+        public void ArrayMixedType()
+        {
+            AssertTokens("[1, 'foo', null, false, true, [2], {'x':'y' }]",
+                JsonToken.StartArray,
+                JsonToken.Value(1),
+                JsonToken.Value("foo"),
+                JsonToken.Null,
+                JsonToken.False,
+                JsonToken.True,
+                JsonToken.StartArray,
+                JsonToken.Value(2),
+                JsonToken.EndArray,
+                JsonToken.StartObject,
+                JsonToken.Name("x"),
+                JsonToken.Value("y"),
+                JsonToken.EndObject,
+                JsonToken.EndArray);
+        }
+
+        [Test]
+        public void ObjectMixedType()
+        {
+            AssertTokens(@"{'a': 1, 'b': 'bar', 'c': null, 'd': false, 'e': true, 
+                           'f': [2], 'g': {'x':'y' }}",
+                JsonToken.StartObject,
+                JsonToken.Name("a"),
+                JsonToken.Value(1),
+                JsonToken.Name("b"),
+                JsonToken.Value("bar"),
+                JsonToken.Name("c"),
+                JsonToken.Null,
+                JsonToken.Name("d"),
+                JsonToken.False,
+                JsonToken.Name("e"),
+                JsonToken.True,
+                JsonToken.Name("f"),
+                JsonToken.StartArray,
+                JsonToken.Value(2),
+                JsonToken.EndArray,
+                JsonToken.Name("g"),
+                JsonToken.StartObject,
+                JsonToken.Name("x"),
+                JsonToken.Value("y"),
+                JsonToken.EndObject,
+                JsonToken.EndObject);
+        }
+
+        [Test]
+        public void NextAfterEndDocumentThrows()
+        {
+            var tokenizer = new JsonTokenizer(new StringReader("null"));
+            Assert.AreEqual(JsonToken.Null, tokenizer.Next());
+            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
+            Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
+        }
+
+        [Test]
+        public void CanPushBackEndDocument()
+        {
+            var tokenizer = new JsonTokenizer(new StringReader("null"));
+            Assert.AreEqual(JsonToken.Null, tokenizer.Next());
+            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
+            tokenizer.PushBack(JsonToken.EndDocument);
+            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
+            Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
+        }
+       
+        /// <summary>
+        /// Asserts that the specified JSON is tokenized into the given sequence of tokens.
+        /// All apostrophes are first converted to double quotes, allowing any tests
+        /// that don't need to check actual apostrophe handling to use apostrophes in the JSON, avoiding
+        /// messy string literal escaping. The "end document" token is not specified in the list of 
+        /// expected tokens, but is implicit.
+        /// </summary>
+        private static void AssertTokens(string json, params JsonToken[] expectedTokens)
+        {
+            AssertTokensNoReplacement(json.Replace('\'', '"'), expectedTokens);
+        }
+
+        /// <summary>
+        /// Asserts that the specified JSON is tokenized into the given sequence of tokens.
+        /// Unlike <see cref="AssertTokens(string, JsonToken[])"/>, this does not perform any character
+        /// replacement on the specified JSON, and should be used when the text contains apostrophes which
+        /// are expected to be used *as* apostrophes. The "end document" token is not specified in the list of 
+        /// expected tokens, but is implicit.
+        /// </summary>
+        private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens)
+        {
+            var reader = new StringReader(json);
+            var tokenizer = new JsonTokenizer(reader);
+            for (int i = 0; i < expectedTokens.Length; i++)
+            {
+                var actualToken = tokenizer.Next();
+                if (actualToken == JsonToken.EndDocument)
+                {
+                    Assert.Fail("Expected {0} but reached end of token stream", expectedTokens[i]);
+                }
+                Assert.AreEqual(expectedTokens[i], actualToken);
+            }
+            var finalToken = tokenizer.Next();
+            if (finalToken != JsonToken.EndDocument)
+            {
+                Assert.Fail("Expected token stream to be exhausted; received {0}", finalToken);
+            }
+        }
+
+        private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens)
+        {
+            var reader = new StringReader(json);
+            var tokenizer = new JsonTokenizer(reader);
+            for (int i = 0; i < expectedTokens.Length; i++)
+            {
+                var actualToken = tokenizer.Next();
+                if (actualToken == JsonToken.EndDocument)
+                {
+                    Assert.Fail("Expected {0} but reached end of document", expectedTokens[i]);
+                }
+                Assert.AreEqual(expectedTokens[i], actualToken);
+            }
+            Assert.Throws<InvalidProtocolBufferException>(() => tokenizer.Next());
+        }
+    }
+}
author	Jon Skeet <jonskeet@google.com>	2015-09-04 12:41:14 +0100
committer	Jon Skeet <jonskeet@google.com>	2015-11-03 19:05:11 +0000
commit	fb2488225fbd239f7880e3b493cbfd2f19da755b (patch)
tree	e51173d483b91a77708dd3bdbffa1577bf89bbec /csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs
parent	aa3675415ec8da317793a3e151e44daaebc21ff8 (diff)