aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/protobuf/3.4.0/src/google/protobuf/util/internal/json_stream_parser.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/protobuf/3.4.0/src/google/protobuf/util/internal/json_stream_parser.cc')
-rw-r--r--third_party/protobuf/3.4.0/src/google/protobuf/util/internal/json_stream_parser.cc844
1 files changed, 844 insertions, 0 deletions
diff --git a/third_party/protobuf/3.4.0/src/google/protobuf/util/internal/json_stream_parser.cc b/third_party/protobuf/3.4.0/src/google/protobuf/util/internal/json_stream_parser.cc
new file mode 100644
index 0000000000..047c14e150
--- /dev/null
+++ b/third_party/protobuf/3.4.0/src/google/protobuf/util/internal/json_stream_parser.cc
@@ -0,0 +1,844 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <google/protobuf/util/internal/json_stream_parser.h>
+
+#include <algorithm>
+#include <cctype>
+#include <cerrno>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#ifndef _SHARED_PTR_H
+#include <google/protobuf/stubs/shared_ptr.h>
+#endif
+
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/util/internal/object_writer.h>
+#include <google/protobuf/util/internal/json_escaping.h>
+#include <google/protobuf/stubs/strutil.h>
+#include <google/protobuf/stubs/mathlimits.h>
+
+
+namespace google {
+namespace protobuf {
+namespace util {
+
+// Allow these symbols to be referenced as util::Status, util::error::* in
+// this file.
+using util::Status;
+namespace error {
+using util::error::CANCELLED;
+using util::error::INTERNAL;
+using util::error::INVALID_ARGUMENT;
+} // namespace error
+
+namespace converter {
+
+// Number of digits in an escaped UTF-16 code unit ('\\' 'u' X X X X)
+static const int kUnicodeEscapedLength = 6;
+
+// Length of the true, false, and null literals.
+static const int true_len = strlen("true");
+static const int false_len = strlen("false");
+static const int null_len = strlen("null");
+
+inline bool IsLetter(char c) {
+ return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') ||
+ (c == '$');
+}
+
+inline bool IsAlphanumeric(char c) {
+ return IsLetter(c) || ('0' <= c && c <= '9');
+}
+
+static bool ConsumeKey(StringPiece* input, StringPiece* key) {
+ if (input->empty() || !IsLetter((*input)[0])) return false;
+ int len = 1;
+ for (; len < input->size(); ++len) {
+ if (!IsAlphanumeric((*input)[len])) {
+ break;
+ }
+ }
+ *key = StringPiece(input->data(), len);
+ *input = StringPiece(input->data() + len, input->size() - len);
+ return true;
+}
+
+static bool MatchKey(StringPiece input) {
+ return !input.empty() && IsLetter(input[0]);
+}
+
+JsonStreamParser::JsonStreamParser(ObjectWriter* ow)
+ : ow_(ow),
+ stack_(),
+ leftover_(),
+ json_(),
+ p_(),
+ key_(),
+ key_storage_(),
+ finishing_(false),
+ parsed_(),
+ parsed_storage_(),
+ string_open_(0),
+ chunk_storage_(),
+ coerce_to_utf8_(false),
+ allow_empty_null_(false),
+ loose_float_number_conversion_(false) {
+ // Initialize the stack with a single value to be parsed.
+ stack_.push(VALUE);
+}
+
+JsonStreamParser::~JsonStreamParser() {}
+
+
+util::Status JsonStreamParser::Parse(StringPiece json) {
+ StringPiece chunk = json;
+ // If we have leftovers from a previous chunk, append the new chunk to it
+ // and create a new StringPiece pointing at the string's data. This could
+ // be large but we rely on the chunks to be small, assuming they are
+ // fragments of a Cord.
+ if (!leftover_.empty()) {
+ // Don't point chunk to leftover_ because leftover_ will be updated in
+ // ParseChunk(chunk).
+ chunk_storage_.swap(leftover_);
+ StrAppend(&chunk_storage_, json);
+ chunk = StringPiece(chunk_storage_);
+ }
+
+ // Find the structurally valid UTF8 prefix and parse only that.
+ int n = internal::UTF8SpnStructurallyValid(chunk);
+ if (n > 0) {
+ util::Status status = ParseChunk(chunk.substr(0, n));
+
+ // Any leftover characters are stashed in leftover_ for later parsing when
+ // there is more data available.
+ StrAppend(&leftover_, chunk.substr(n));
+ return status;
+ } else {
+ leftover_.assign(chunk.data(), chunk.size());
+ return util::Status();
+ }
+}
+
+util::Status JsonStreamParser::FinishParse() {
+ // If we do not expect anything and there is nothing left to parse we're all
+ // done.
+ if (stack_.empty() && leftover_.empty()) {
+ return util::Status();
+ }
+
+ // Storage for UTF8-coerced string.
+ google::protobuf::scoped_array<char> utf8;
+ if (coerce_to_utf8_) {
+ utf8.reset(new char[leftover_.size()]);
+ char* coerced = internal::UTF8CoerceToStructurallyValid(leftover_, utf8.get(), ' ');
+ p_ = json_ = StringPiece(coerced, leftover_.size());
+ } else {
+ p_ = json_ = leftover_;
+ if (!internal::IsStructurallyValidUTF8(leftover_)) {
+ return ReportFailure("Encountered non UTF-8 code points.");
+ }
+ }
+
+ // Parse the remainder in finishing mode, which reports errors for things like
+ // unterminated strings or unknown tokens that would normally be retried.
+ finishing_ = true;
+ util::Status result = RunParser();
+ if (result.ok()) {
+ SkipWhitespace();
+ if (!p_.empty()) {
+ result = ReportFailure("Parsing terminated before end of input.");
+ }
+ }
+ return result;
+}
+
+util::Status JsonStreamParser::ParseChunk(StringPiece chunk) {
+ // Do not do any work if the chunk is empty.
+ if (chunk.empty()) return util::Status();
+
+ p_ = json_ = chunk;
+
+ finishing_ = false;
+ util::Status result = RunParser();
+ if (!result.ok()) return result;
+
+ SkipWhitespace();
+ if (p_.empty()) {
+ // If we parsed everything we had, clear the leftover.
+ leftover_.clear();
+ } else {
+ // If we do not expect anything i.e. stack is empty, and we have non-empty
+ // string left to parse, we report an error.
+ if (stack_.empty()) {
+ return ReportFailure("Parsing terminated before end of input.");
+ }
+ // If we expect future data i.e. stack is non-empty, and we have some
+ // unparsed data left, we save it for later parse.
+ leftover_ = p_.ToString();
+ }
+ return util::Status();
+}
+
+util::Status JsonStreamParser::RunParser() {
+ while (!stack_.empty()) {
+ ParseType type = stack_.top();
+ TokenType t = (string_open_ == 0) ? GetNextTokenType() : BEGIN_STRING;
+ stack_.pop();
+ util::Status result;
+ switch (type) {
+ case VALUE:
+ result = ParseValue(t);
+ break;
+
+ case OBJ_MID:
+ result = ParseObjectMid(t);
+ break;
+
+ case ENTRY:
+ result = ParseEntry(t);
+ break;
+
+ case ENTRY_MID:
+ result = ParseEntryMid(t);
+ break;
+
+ case ARRAY_VALUE:
+ result = ParseArrayValue(t);
+ break;
+
+ case ARRAY_MID:
+ result = ParseArrayMid(t);
+ break;
+
+ default:
+ result = util::Status(util::error::INTERNAL,
+ StrCat("Unknown parse type: ", type));
+ break;
+ }
+ if (!result.ok()) {
+ // If we were cancelled, save our state and try again later.
+ if (!finishing_ && result == util::Status(error::CANCELLED, "")) {
+ stack_.push(type);
+ // If we have a key we still need to render, make sure to save off the
+ // contents in our own storage.
+ if (!key_.empty() && key_storage_.empty()) {
+ StrAppend(&key_storage_, key_);
+ key_ = StringPiece(key_storage_);
+ }
+ result = util::Status();
+ }
+ return result;
+ }
+ }
+ return util::Status();
+}
+
+util::Status JsonStreamParser::ParseValue(TokenType type) {
+ switch (type) {
+ case BEGIN_OBJECT:
+ return HandleBeginObject();
+ case BEGIN_ARRAY:
+ return HandleBeginArray();
+ case BEGIN_STRING:
+ return ParseString();
+ case BEGIN_NUMBER:
+ return ParseNumber();
+ case BEGIN_TRUE:
+ return ParseTrue();
+ case BEGIN_FALSE:
+ return ParseFalse();
+ case BEGIN_NULL:
+ return ParseNull();
+ case UNKNOWN:
+ return ReportUnknown("Expected a value.");
+ default: {
+ if (allow_empty_null_ && IsEmptyNullAllowed(type)) {
+ return ParseEmptyNull();
+ }
+
+ // Special case for having been cut off while parsing, wait for more data.
+ // This handles things like 'fals' being at the end of the string, we
+ // don't know if the next char would be e, completing it, or something
+ // else, making it invalid.
+ if (!finishing_ && p_.length() < false_len) {
+ return util::Status(error::CANCELLED, "");
+ }
+ return ReportFailure("Unexpected token.");
+ }
+ }
+}
+
+util::Status JsonStreamParser::ParseString() {
+ util::Status result = ParseStringHelper();
+ if (result.ok()) {
+ ow_->RenderString(key_, parsed_);
+ key_ = StringPiece();
+ parsed_ = StringPiece();
+ parsed_storage_.clear();
+ }
+ return result;
+}
+
+util::Status JsonStreamParser::ParseStringHelper() {
+ // If we haven't seen the start quote, grab it and remember it for later.
+ if (string_open_ == 0) {
+ string_open_ = *p_.data();
+ GOOGLE_DCHECK(string_open_ == '\"' || string_open_ == '\'');
+ Advance();
+ }
+ // Track where we last copied data from so we can minimize copying.
+ const char* last = p_.data();
+ while (!p_.empty()) {
+ const char* data = p_.data();
+ if (*data == '\\') {
+ // We're about to handle an escape, copy all bytes from last to data.
+ if (last < data) {
+ parsed_storage_.append(last, data - last);
+ }
+ // If we ran out of string after the \, cancel or report an error
+ // depending on if we expect more data later.
+ if (p_.length() == 1) {
+ if (!finishing_) {
+ return util::Status(error::CANCELLED, "");
+ }
+ return ReportFailure("Closing quote expected in string.");
+ }
+ // Parse a unicode escape if we found \u in the string.
+ if (data[1] == 'u') {
+ util::Status result = ParseUnicodeEscape();
+ if (!result.ok()) {
+ return result;
+ }
+ // Move last pointer past the unicode escape and continue.
+ last = p_.data();
+ continue;
+ }
+ // Handle the standard set of backslash-escaped characters.
+ switch (data[1]) {
+ case 'b':
+ parsed_storage_.push_back('\b');
+ break;
+ case 'f':
+ parsed_storage_.push_back('\f');
+ break;
+ case 'n':
+ parsed_storage_.push_back('\n');
+ break;
+ case 'r':
+ parsed_storage_.push_back('\r');
+ break;
+ case 't':
+ parsed_storage_.push_back('\t');
+ break;
+ case 'v':
+ parsed_storage_.push_back('\v');
+ break;
+ default:
+ parsed_storage_.push_back(data[1]);
+ }
+ // We handled two characters, so advance past them and continue.
+ p_.remove_prefix(2);
+ last = p_.data();
+ continue;
+ }
+ // If we found the closing quote note it, advance past it, and return.
+ if (*data == string_open_) {
+ // If we didn't copy anything, reuse the input buffer.
+ if (parsed_storage_.empty()) {
+ parsed_ = StringPiece(last, data - last);
+ } else {
+ if (last < data) {
+ parsed_storage_.append(last, data - last);
+ }
+ parsed_ = StringPiece(parsed_storage_);
+ }
+ // Clear the quote char so next time we try to parse a string we'll
+ // start fresh.
+ string_open_ = 0;
+ Advance();
+ return util::Status();
+ }
+ // Normal character, just advance past it.
+ Advance();
+ }
+ // If we ran out of characters, copy over what we have so far.
+ if (last < p_.data()) {
+ parsed_storage_.append(last, p_.data() - last);
+ }
+ // If we didn't find the closing quote but we expect more data, cancel for now
+ if (!finishing_) {
+ return util::Status(error::CANCELLED, "");
+ }
+ // End of string reached without a closing quote, report an error.
+ string_open_ = 0;
+ return ReportFailure("Closing quote expected in string.");
+}
+
+// Converts a unicode escaped character to a decimal value stored in a char32
+// for use in UTF8 encoding utility. We assume that str begins with \uhhhh and
+// convert that from the hex number to a decimal value.
+//
+// There are some security exploits with UTF-8 that we should be careful of:
+// - http://www.unicode.org/reports/tr36/#UTF-8_Exploit
+// - http://sites/intl-eng/design-guide/core-application
+util::Status JsonStreamParser::ParseUnicodeEscape() {
+ if (p_.length() < kUnicodeEscapedLength) {
+ if (!finishing_) {
+ return util::Status(error::CANCELLED, "");
+ }
+ return ReportFailure("Illegal hex string.");
+ }
+ GOOGLE_DCHECK_EQ('\\', p_.data()[0]);
+ GOOGLE_DCHECK_EQ('u', p_.data()[1]);
+ uint32 code = 0;
+ for (int i = 2; i < kUnicodeEscapedLength; ++i) {
+ if (!isxdigit(p_.data()[i])) {
+ return ReportFailure("Invalid escape sequence.");
+ }
+ code = (code << 4) + hex_digit_to_int(p_.data()[i]);
+ }
+ if (code >= JsonEscaping::kMinHighSurrogate &&
+ code <= JsonEscaping::kMaxHighSurrogate) {
+ if (p_.length() < 2 * kUnicodeEscapedLength) {
+ if (!finishing_) {
+ return util::Status(error::CANCELLED, "");
+ }
+ if (!coerce_to_utf8_) {
+ return ReportFailure("Missing low surrogate.");
+ }
+ } else if (p_.data()[kUnicodeEscapedLength] == '\\' &&
+ p_.data()[kUnicodeEscapedLength + 1] == 'u') {
+ uint32 low_code = 0;
+ for (int i = kUnicodeEscapedLength + 2; i < 2 * kUnicodeEscapedLength;
+ ++i) {
+ if (!isxdigit(p_.data()[i])) {
+ return ReportFailure("Invalid escape sequence.");
+ }
+ low_code = (low_code << 4) + hex_digit_to_int(p_.data()[i]);
+ }
+ if (low_code >= JsonEscaping::kMinLowSurrogate &&
+ low_code <= JsonEscaping::kMaxLowSurrogate) {
+ // Convert UTF-16 surrogate pair to 21-bit Unicode codepoint.
+ code = (((code & 0x3FF) << 10) | (low_code & 0x3FF)) +
+ JsonEscaping::kMinSupplementaryCodePoint;
+ // Advance past the first code unit escape.
+ p_.remove_prefix(kUnicodeEscapedLength);
+ } else if (!coerce_to_utf8_) {
+ return ReportFailure("Invalid low surrogate.");
+ }
+ } else if (!coerce_to_utf8_) {
+ return ReportFailure("Missing low surrogate.");
+ }
+ }
+ if (!coerce_to_utf8_ && !IsValidCodePoint(code)) {
+ return ReportFailure("Invalid unicode code point.");
+ }
+ char buf[UTFmax];
+ int len = EncodeAsUTF8Char(code, buf);
+ // Advance past the [final] code unit escape.
+ p_.remove_prefix(kUnicodeEscapedLength);
+ parsed_storage_.append(buf, len);
+ return util::Status();
+}
+
+util::Status JsonStreamParser::ParseNumber() {
+ NumberResult number;
+ util::Status result = ParseNumberHelper(&number);
+ if (result.ok()) {
+ switch (number.type) {
+ case NumberResult::DOUBLE:
+ ow_->RenderDouble(key_, number.double_val);
+ key_ = StringPiece();
+ break;
+
+ case NumberResult::INT:
+ ow_->RenderInt64(key_, number.int_val);
+ key_ = StringPiece();
+ break;
+
+ case NumberResult::UINT:
+ ow_->RenderUint64(key_, number.uint_val);
+ key_ = StringPiece();
+ break;
+
+ default:
+ return ReportFailure("Unable to parse number.");
+ }
+ }
+ return result;
+}
+
+util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
+ const char* data = p_.data();
+ int length = p_.length();
+
+ // Look for the first non-numeric character, or the end of the string.
+ int index = 0;
+ bool floating = false;
+ bool negative = data[index] == '-';
+ // Find the first character that cannot be part of the number. Along the way
+ // detect if the number needs to be parsed as a double.
+ // Note that this restricts numbers to the JSON specification, so for example
+ // we do not support hex or octal notations.
+ for (; index < length; ++index) {
+ char c = data[index];
+ if (isdigit(c)) continue;
+ if (c == '.' || c == 'e' || c == 'E') {
+ floating = true;
+ continue;
+ }
+ if (c == '+' || c == '-' || c == 'x') continue;
+ // Not a valid number character, break out.
+ break;
+ }
+
+ // If the entire input is a valid number, and we may have more content in the
+ // future, we abort for now and resume when we know more.
+ if (index == length && !finishing_) {
+ return util::Status(error::CANCELLED, "");
+ }
+
+ // Create a string containing just the number, so we can use safe_strtoX
+ string number = p_.substr(0, index).ToString();
+
+ // Floating point number, parse as a double.
+ if (floating) {
+ if (!safe_strtod(number, &result->double_val)) {
+ return ReportFailure("Unable to parse number.");
+ }
+ if (!loose_float_number_conversion_ &&
+ !MathLimits<double>::IsFinite(result->double_val)) {
+ return ReportFailure("Number exceeds the range of double.");
+ }
+ result->type = NumberResult::DOUBLE;
+ p_.remove_prefix(index);
+ return util::Status();
+ }
+
+ // Positive non-floating point number, parse as a uint64.
+ if (!negative) {
+ // Octal/Hex numbers are not valid JSON values.
+ if (number.length() >= 2 && number[0] == '0') {
+ return ReportFailure("Octal/hex numbers are not valid JSON values.");
+ }
+ if (!safe_strtou64(number, &result->uint_val)) {
+ return ReportFailure("Unable to parse number.");
+ }
+ result->type = NumberResult::UINT;
+ p_.remove_prefix(index);
+ return util::Status();
+ }
+
+ // Octal/Hex numbers are not valid JSON values.
+ if (number.length() >= 3 && number[1] == '0') {
+ return ReportFailure("Octal/hex numbers are not valid JSON values.");
+ }
+ // Negative non-floating point number, parse as an int64.
+ if (!safe_strto64(number, &result->int_val)) {
+ return ReportFailure("Unable to parse number.");
+ }
+ result->type = NumberResult::INT;
+ p_.remove_prefix(index);
+ return util::Status();
+}
+
+util::Status JsonStreamParser::HandleBeginObject() {
+ GOOGLE_DCHECK_EQ('{', *p_.data());
+ Advance();
+ ow_->StartObject(key_);
+ key_ = StringPiece();
+ stack_.push(ENTRY);
+ return util::Status();
+}
+
+util::Status JsonStreamParser::ParseObjectMid(TokenType type) {
+ if (type == UNKNOWN) {
+ return ReportUnknown("Expected , or } after key:value pair.");
+ }
+
+ // Object is complete, advance past the comma and render the EndObject.
+ if (type == END_OBJECT) {
+ Advance();
+ ow_->EndObject();
+ return util::Status();
+ }
+ // Found a comma, advance past it and get ready for an entry.
+ if (type == VALUE_SEPARATOR) {
+ Advance();
+ stack_.push(ENTRY);
+ return util::Status();
+ }
+ // Illegal token after key:value pair.
+ return ReportFailure("Expected , or } after key:value pair.");
+}
+
+util::Status JsonStreamParser::ParseEntry(TokenType type) {
+ if (type == UNKNOWN) {
+ return ReportUnknown("Expected an object key or }.");
+ }
+
+ // Close the object and return. This allows for trailing commas.
+ if (type == END_OBJECT) {
+ ow_->EndObject();
+ Advance();
+ return util::Status();
+ }
+
+ util::Status result;
+ if (type == BEGIN_STRING) {
+ // Key is a string (standard JSON), parse it and store the string.
+ result = ParseStringHelper();
+ if (result.ok()) {
+ key_storage_.clear();
+ if (!parsed_storage_.empty()) {
+ parsed_storage_.swap(key_storage_);
+ key_ = StringPiece(key_storage_);
+ } else {
+ key_ = parsed_;
+ }
+ parsed_ = StringPiece();
+ }
+ } else if (type == BEGIN_KEY) {
+ // Key is a bare key (back compat), create a StringPiece pointing to it.
+ result = ParseKey();
+ } else {
+ // Unknown key type, report an error.
+ result = ReportFailure("Expected an object key or }.");
+ }
+ // On success we next expect an entry mid ':' then an object mid ',' or '}'
+ if (result.ok()) {
+ stack_.push(OBJ_MID);
+ stack_.push(ENTRY_MID);
+ }
+ return result;
+}
+
+util::Status JsonStreamParser::ParseEntryMid(TokenType type) {
+ if (type == UNKNOWN) {
+ return ReportUnknown("Expected : between key:value pair.");
+ }
+ if (type == ENTRY_SEPARATOR) {
+ Advance();
+ stack_.push(VALUE);
+ return util::Status();
+ }
+ return ReportFailure("Expected : between key:value pair.");
+}
+
+util::Status JsonStreamParser::HandleBeginArray() {
+ GOOGLE_DCHECK_EQ('[', *p_.data());
+ Advance();
+ ow_->StartList(key_);
+ key_ = StringPiece();
+ stack_.push(ARRAY_VALUE);
+ return util::Status();
+}
+
+util::Status JsonStreamParser::ParseArrayValue(TokenType type) {
+ if (type == UNKNOWN) {
+ return ReportUnknown("Expected a value or ] within an array.");
+ }
+
+ if (type == END_ARRAY) {
+ ow_->EndList();
+ Advance();
+ return util::Status();
+ }
+
+ // The ParseValue call may push something onto the stack so we need to make
+ // sure an ARRAY_MID is after it, so we push it on now. Also, the parsing of
+ // empty-null array value is relying on this ARRAY_MID token.
+ stack_.push(ARRAY_MID);
+ util::Status result = ParseValue(type);
+ if (result == util::Status(error::CANCELLED, "")) {
+ // If we were cancelled, pop back off the ARRAY_MID so we don't try to
+ // push it on again when we try over.
+ stack_.pop();
+ }
+ return result;
+}
+
+util::Status JsonStreamParser::ParseArrayMid(TokenType type) {
+ if (type == UNKNOWN) {
+ return ReportUnknown("Expected , or ] after array value.");
+ }
+
+ if (type == END_ARRAY) {
+ ow_->EndList();
+ Advance();
+ return util::Status();
+ }
+
+ // Found a comma, advance past it and expect an array value next.
+ if (type == VALUE_SEPARATOR) {
+ Advance();
+ stack_.push(ARRAY_VALUE);
+ return util::Status();
+ }
+ // Illegal token after array value.
+ return ReportFailure("Expected , or ] after array value.");
+}
+
+util::Status JsonStreamParser::ParseTrue() {
+ ow_->RenderBool(key_, true);
+ key_ = StringPiece();
+ p_.remove_prefix(true_len);
+ return util::Status();
+}
+
+util::Status JsonStreamParser::ParseFalse() {
+ ow_->RenderBool(key_, false);
+ key_ = StringPiece();
+ p_.remove_prefix(false_len);
+ return util::Status();
+}
+
+util::Status JsonStreamParser::ParseNull() {
+ ow_->RenderNull(key_);
+ key_ = StringPiece();
+ p_.remove_prefix(null_len);
+ return util::Status();
+}
+
+util::Status JsonStreamParser::ParseEmptyNull() {
+ ow_->RenderNull(key_);
+ key_ = StringPiece();
+ return util::Status();
+}
+
+bool JsonStreamParser::IsEmptyNullAllowed(TokenType type) {
+ if (stack_.empty()) return false;
+ return (stack_.top() == ARRAY_MID && type == VALUE_SEPARATOR) ||
+ stack_.top() == OBJ_MID;
+}
+
+util::Status JsonStreamParser::ReportFailure(StringPiece message) {
+ static const int kContextLength = 20;
+ const char* p_start = p_.data();
+ const char* json_start = json_.data();
+ const char* begin = std::max(p_start - kContextLength, json_start);
+ const char* end =
+ std::min(p_start + kContextLength, json_start + json_.size());
+ StringPiece segment(begin, end - begin);
+ string location(p_start - begin, ' ');
+ location.push_back('^');
+ return util::Status(util::error::INVALID_ARGUMENT,
+ StrCat(message, "\n", segment, "\n", location));
+}
+
+util::Status JsonStreamParser::ReportUnknown(StringPiece message) {
+ // If we aren't finishing the parse, cancel parsing and try later.
+ if (!finishing_) {
+ return util::Status(error::CANCELLED, "");
+ }
+ if (p_.empty()) {
+ return ReportFailure(StrCat("Unexpected end of string. ", message));
+ }
+ return ReportFailure(message);
+}
+
+void JsonStreamParser::SkipWhitespace() {
+ while (!p_.empty() && ascii_isspace(*p_.data())) {
+ Advance();
+ }
+}
+
+void JsonStreamParser::Advance() {
+ // Advance by moving one UTF8 character while making sure we don't go beyond
+ // the length of StringPiece.
+ p_.remove_prefix(std::min<int>(
+ p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length())));
+}
+
+util::Status JsonStreamParser::ParseKey() {
+ StringPiece original = p_;
+ if (!ConsumeKey(&p_, &key_)) {
+ return ReportFailure("Invalid key or variable name.");
+ }
+ // If we consumed everything but expect more data, reset p_ and cancel since
+ // we can't know if the key was complete or not.
+ if (!finishing_ && p_.empty()) {
+ p_ = original;
+ return util::Status(error::CANCELLED, "");
+ }
+ // Since we aren't using the key storage, clear it out.
+ key_storage_.clear();
+ return util::Status();
+}
+
+JsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() {
+ SkipWhitespace();
+
+ int size = p_.size();
+ if (size == 0) {
+ // If we ran out of data, report unknown and we'll place the previous parse
+ // type onto the stack and try again when we have more data.
+ return UNKNOWN;
+ }
+ // TODO(sven): Split this method based on context since different contexts
+ // support different tokens. Would slightly speed up processing?
+ const char* data = p_.data();
+ if (*data == '\"' || *data == '\'') return BEGIN_STRING;
+ if (*data == '-' || ('0' <= *data && *data <= '9')) {
+ return BEGIN_NUMBER;
+ }
+ if (size >= true_len && !strncmp(data, "true", true_len)) {
+ return BEGIN_TRUE;
+ }
+ if (size >= false_len && !strncmp(data, "false", false_len)) {
+ return BEGIN_FALSE;
+ }
+ if (size >= null_len && !strncmp(data, "null", null_len)) {
+ return BEGIN_NULL;
+ }
+ if (*data == '{') return BEGIN_OBJECT;
+ if (*data == '}') return END_OBJECT;
+ if (*data == '[') return BEGIN_ARRAY;
+ if (*data == ']') return END_ARRAY;
+ if (*data == ':') return ENTRY_SEPARATOR;
+ if (*data == ',') return VALUE_SEPARATOR;
+ if (MatchKey(p_)) {
+ return BEGIN_KEY;
+ }
+
+ // We don't know that we necessarily have an invalid token here, just that we
+ // can't parse what we have so far. So we don't report an error and just
+ // return UNKNOWN so we can try again later when we have more data, or if we
+ // finish and we have leftovers.
+ return UNKNOWN;
+}
+
+} // namespace converter
+} // namespace util
+} // namespace protobuf
+} // namespace google