diff options
Diffstat (limited to 'src/google/protobuf/util/internal/json_stream_parser.cc')
-rw-r--r-- | src/google/protobuf/util/internal/json_stream_parser.cc | 239 |
1 files changed, 163 insertions, 76 deletions
diff --git a/src/google/protobuf/util/internal/json_stream_parser.cc b/src/google/protobuf/util/internal/json_stream_parser.cc index df916751..1c63b31d 100644 --- a/src/google/protobuf/util/internal/json_stream_parser.cc +++ b/src/google/protobuf/util/internal/json_stream_parser.cc @@ -36,14 +36,14 @@ #include <cstdlib> #include <cstring> #include <memory> -#ifndef _SHARED_PTR_H -#include <google/protobuf/stubs/shared_ptr.h> -#endif #include <google/protobuf/stubs/logging.h> #include <google/protobuf/stubs/common.h> -#include <google/protobuf/stubs/strutil.h> #include <google/protobuf/util/internal/object_writer.h> +#include <google/protobuf/util/internal/json_escaping.h> +#include <google/protobuf/stubs/strutil.h> +#include <google/protobuf/stubs/mathlimits.h> + namespace google { namespace protobuf { @@ -53,13 +53,14 @@ namespace util { // this file. using util::Status; namespace error { +using util::error::CANCELLED; using util::error::INTERNAL; using util::error::INVALID_ARGUMENT; } // namespace error namespace converter { -// Number of digits in a unicode escape sequence (/uXXXX) +// Number of digits in an escaped UTF-16 code unit ('\\' 'u' X X X X) static const int kUnicodeEscapedLength = 6; // Length of the true, false, and null literals. @@ -106,7 +107,9 @@ JsonStreamParser::JsonStreamParser(ObjectWriter* ow) parsed_storage_(), string_open_(0), chunk_storage_(), - coerce_to_utf8_(false) { + coerce_to_utf8_(false), + allow_empty_null_(false), + loose_float_number_conversion_(false) { // Initialize the stack with a single value to be parsed. stack_.push(VALUE); } @@ -124,7 +127,7 @@ util::Status JsonStreamParser::Parse(StringPiece json) { // Don't point chunk to leftover_ because leftover_ will be updated in // ParseChunk(chunk). chunk_storage_.swap(leftover_); - json.AppendToString(&chunk_storage_); + StrAppend(&chunk_storage_, json); chunk = StringPiece(chunk_storage_); } @@ -135,11 +138,11 @@ util::Status JsonStreamParser::Parse(StringPiece json) { // Any leftover characters are stashed in leftover_ for later parsing when // there is more data available. - chunk.substr(n).AppendToString(&leftover_); + StrAppend(&leftover_, chunk.substr(n)); return status; } else { - chunk.CopyToString(&leftover_); - return util::Status::OK; + leftover_.assign(chunk.data(), chunk.size()); + return util::Status(); } } @@ -147,11 +150,11 @@ util::Status JsonStreamParser::FinishParse() { // If we do not expect anything and there is nothing left to parse we're all // done. if (stack_.empty() && leftover_.empty()) { - return util::Status::OK; + return util::Status(); } // Storage for UTF8-coerced string. - google::protobuf::scoped_array<char> utf8; + std::unique_ptr<char[]> utf8; if (coerce_to_utf8_) { utf8.reset(new char[leftover_.size()]); char* coerced = internal::UTF8CoerceToStructurallyValid(leftover_, utf8.get(), ' '); @@ -178,7 +181,7 @@ util::Status JsonStreamParser::FinishParse() { util::Status JsonStreamParser::ParseChunk(StringPiece chunk) { // Do not do any work if the chunk is empty. - if (chunk.empty()) return util::Status::OK; + if (chunk.empty()) return util::Status(); p_ = json_ = chunk; @@ -200,7 +203,7 @@ util::Status JsonStreamParser::ParseChunk(StringPiece chunk) { // unparsed data left, we save it for later parse. leftover_ = p_.ToString(); } - return util::Status::OK; + return util::Status(); } util::Status JsonStreamParser::RunParser() { @@ -241,20 +244,20 @@ util::Status JsonStreamParser::RunParser() { } if (!result.ok()) { // If we were cancelled, save our state and try again later. - if (!finishing_ && result == util::Status::CANCELLED) { + if (!finishing_ && result == util::Status(error::CANCELLED, "")) { stack_.push(type); // If we have a key we still need to render, make sure to save off the // contents in our own storage. if (!key_.empty() && key_storage_.empty()) { - key_.AppendToString(&key_storage_); + StrAppend(&key_storage_, key_); key_ = StringPiece(key_storage_); } - result = util::Status::OK; + result = util::Status(); } return result; } } - return util::Status::OK; + return util::Status(); } util::Status JsonStreamParser::ParseValue(TokenType type) { @@ -276,12 +279,16 @@ util::Status JsonStreamParser::ParseValue(TokenType type) { case UNKNOWN: return ReportUnknown("Expected a value."); default: { + if (allow_empty_null_ && IsEmptyNullAllowed(type)) { + return ParseEmptyNull(); + } + // Special case for having been cut off while parsing, wait for more data. // This handles things like 'fals' being at the end of the string, we // don't know if the next char would be e, completing it, or something // else, making it invalid. if (!finishing_ && p_.length() < false_len) { - return util::Status::CANCELLED; + return util::Status(error::CANCELLED, ""); } return ReportFailure("Unexpected token."); } @@ -292,8 +299,8 @@ util::Status JsonStreamParser::ParseString() { util::Status result = ParseStringHelper(); if (result.ok()) { ow_->RenderString(key_, parsed_); - key_.clear(); - parsed_.clear(); + key_ = StringPiece(); + parsed_ = StringPiece(); parsed_storage_.clear(); } return result; @@ -314,13 +321,12 @@ util::Status JsonStreamParser::ParseStringHelper() { // We're about to handle an escape, copy all bytes from last to data. if (last < data) { parsed_storage_.append(last, data - last); - last = data; } // If we ran out of string after the \, cancel or report an error // depending on if we expect more data later. if (p_.length() == 1) { if (!finishing_) { - return util::Status::CANCELLED; + return util::Status(error::CANCELLED, ""); } return ReportFailure("Closing quote expected in string."); } @@ -370,7 +376,6 @@ util::Status JsonStreamParser::ParseStringHelper() { } else { if (last < data) { parsed_storage_.append(last, data - last); - last = data; } parsed_ = StringPiece(parsed_storage_); } @@ -378,7 +383,7 @@ util::Status JsonStreamParser::ParseStringHelper() { // start fresh. string_open_ = 0; Advance(); - return util::Status::OK; + return util::Status(); } // Normal character, just advance past it. Advance(); @@ -389,7 +394,7 @@ util::Status JsonStreamParser::ParseStringHelper() { } // If we didn't find the closing quote but we expect more data, cancel for now if (!finishing_) { - return util::Status::CANCELLED; + return util::Status(error::CANCELLED, ""); } // End of string reached without a closing quote, report an error. string_open_ = 0; @@ -406,7 +411,7 @@ util::Status JsonStreamParser::ParseStringHelper() { util::Status JsonStreamParser::ParseUnicodeEscape() { if (p_.length() < kUnicodeEscapedLength) { if (!finishing_) { - return util::Status::CANCELLED; + return util::Status(error::CANCELLED, ""); } return ReportFailure("Illegal hex string."); } @@ -419,12 +424,48 @@ util::Status JsonStreamParser::ParseUnicodeEscape() { } code = (code << 4) + hex_digit_to_int(p_.data()[i]); } + if (code >= JsonEscaping::kMinHighSurrogate && + code <= JsonEscaping::kMaxHighSurrogate) { + if (p_.length() < 2 * kUnicodeEscapedLength) { + if (!finishing_) { + return util::Status(error::CANCELLED, ""); + } + if (!coerce_to_utf8_) { + return ReportFailure("Missing low surrogate."); + } + } else if (p_.data()[kUnicodeEscapedLength] == '\\' && + p_.data()[kUnicodeEscapedLength + 1] == 'u') { + uint32 low_code = 0; + for (int i = kUnicodeEscapedLength + 2; i < 2 * kUnicodeEscapedLength; + ++i) { + if (!isxdigit(p_.data()[i])) { + return ReportFailure("Invalid escape sequence."); + } + low_code = (low_code << 4) + hex_digit_to_int(p_.data()[i]); + } + if (low_code >= JsonEscaping::kMinLowSurrogate && + low_code <= JsonEscaping::kMaxLowSurrogate) { + // Convert UTF-16 surrogate pair to 21-bit Unicode codepoint. + code = (((code & 0x3FF) << 10) | (low_code & 0x3FF)) + + JsonEscaping::kMinSupplementaryCodePoint; + // Advance past the first code unit escape. + p_.remove_prefix(kUnicodeEscapedLength); + } else if (!coerce_to_utf8_) { + return ReportFailure("Invalid low surrogate."); + } + } else if (!coerce_to_utf8_) { + return ReportFailure("Missing low surrogate."); + } + } + if (!coerce_to_utf8_ && !IsValidCodePoint(code)) { + return ReportFailure("Invalid unicode code point."); + } char buf[UTFmax]; int len = EncodeAsUTF8Char(code, buf); - // Advance past the unicode escape. + // Advance past the [final] code unit escape. p_.remove_prefix(kUnicodeEscapedLength); parsed_storage_.append(buf, len); - return util::Status::OK; + return util::Status(); } util::Status JsonStreamParser::ParseNumber() { @@ -434,17 +475,17 @@ util::Status JsonStreamParser::ParseNumber() { switch (number.type) { case NumberResult::DOUBLE: ow_->RenderDouble(key_, number.double_val); - key_.clear(); + key_ = StringPiece(); break; case NumberResult::INT: ow_->RenderInt64(key_, number.int_val); - key_.clear(); + key_ = StringPiece(); break; case NumberResult::UINT: ow_->RenderUint64(key_, number.uint_val); - key_.clear(); + key_ = StringPiece(); break; default: @@ -454,6 +495,19 @@ util::Status JsonStreamParser::ParseNumber() { return result; } +util::Status JsonStreamParser::ParseDoubleHelper( + const string& number, NumberResult* result) { + if (!safe_strtod(number, &result->double_val)) { + return ReportFailure("Unable to parse number."); + } + if (!loose_float_number_conversion_ && + !MathLimits<double>::IsFinite(result->double_val)) { + return ReportFailure("Number exceeds the range of double."); + } + result->type = NumberResult::DOUBLE; + return util::Status(); +} + util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) { const char* data = p_.data(); int length = p_.length(); @@ -473,7 +527,7 @@ util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) { floating = true; continue; } - if (c == '+' || c == '-') continue; + if (c == '+' || c == '-' || c == 'x') continue; // Not a valid number character, break out. break; } @@ -481,7 +535,7 @@ util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) { // If the entire input is a valid number, and we may have more content in the // future, we abort for now and resume when we know more. if (index == length && !finishing_) { - return util::Status::CANCELLED; + return util::Status(error::CANCELLED, ""); } // Create a string containing just the number, so we can use safe_strtoX @@ -489,40 +543,59 @@ util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) { // Floating point number, parse as a double. if (floating) { - if (!safe_strtod(number, &result->double_val)) { - return ReportFailure("Unable to parse number."); + util::Status status = ParseDoubleHelper(number, result); + if (status.ok()) { + p_.remove_prefix(index); } - result->type = NumberResult::DOUBLE; - p_.remove_prefix(index); - return util::Status::OK; + return status; } // Positive non-floating point number, parse as a uint64. if (!negative) { - if (!safe_strtou64(number, &result->uint_val)) { - return ReportFailure("Unable to parse number."); + // Octal/Hex numbers are not valid JSON values. + if (number.length() >= 2 && number[0] == '0') { + return ReportFailure("Octal/hex numbers are not valid JSON values."); + } + if (safe_strtou64(number, &result->uint_val)) { + result->type = NumberResult::UINT; + p_.remove_prefix(index); + return util::Status(); + } else { + // If the value is too large, parse it as double. + util::Status status = ParseDoubleHelper(number, result); + if (status.ok()) { + p_.remove_prefix(index); + } + return status; } - result->type = NumberResult::UINT; - p_.remove_prefix(index); - return util::Status::OK; } + // Octal/Hex numbers are not valid JSON values. + if (number.length() >= 3 && number[1] == '0') { + return ReportFailure("Octal/hex numbers are not valid JSON values."); + } // Negative non-floating point number, parse as an int64. - if (!safe_strto64(number, &result->int_val)) { - return ReportFailure("Unable to parse number."); + if (safe_strto64(number, &result->int_val)) { + result->type = NumberResult::INT; + p_.remove_prefix(index); + return util::Status(); + } else { + // If the value is too large, parse it as double. + util::Status status = ParseDoubleHelper(number, result); + if (status.ok()) { + p_.remove_prefix(index); + } + return status; } - result->type = NumberResult::INT; - p_.remove_prefix(index); - return util::Status::OK; } util::Status JsonStreamParser::HandleBeginObject() { GOOGLE_DCHECK_EQ('{', *p_.data()); Advance(); ow_->StartObject(key_); - key_.clear(); + key_ = StringPiece(); stack_.push(ENTRY); - return util::Status::OK; + return util::Status(); } util::Status JsonStreamParser::ParseObjectMid(TokenType type) { @@ -534,13 +607,13 @@ util::Status JsonStreamParser::ParseObjectMid(TokenType type) { if (type == END_OBJECT) { Advance(); ow_->EndObject(); - return util::Status::OK; + return util::Status(); } // Found a comma, advance past it and get ready for an entry. if (type == VALUE_SEPARATOR) { Advance(); stack_.push(ENTRY); - return util::Status::OK; + return util::Status(); } // Illegal token after key:value pair. return ReportFailure("Expected , or } after key:value pair."); @@ -555,7 +628,7 @@ util::Status JsonStreamParser::ParseEntry(TokenType type) { if (type == END_OBJECT) { ow_->EndObject(); Advance(); - return util::Status::OK; + return util::Status(); } util::Status result; @@ -570,7 +643,7 @@ util::Status JsonStreamParser::ParseEntry(TokenType type) { } else { key_ = parsed_; } - parsed_.clear(); + parsed_ = StringPiece(); } } else if (type == BEGIN_KEY) { // Key is a bare key (back compat), create a StringPiece pointing to it. @@ -594,7 +667,7 @@ util::Status JsonStreamParser::ParseEntryMid(TokenType type) { if (type == ENTRY_SEPARATOR) { Advance(); stack_.push(VALUE); - return util::Status::OK; + return util::Status(); } return ReportFailure("Expected : between key:value pair."); } @@ -603,9 +676,9 @@ util::Status JsonStreamParser::HandleBeginArray() { GOOGLE_DCHECK_EQ('[', *p_.data()); Advance(); ow_->StartList(key_); - key_.clear(); + key_ = StringPiece(); stack_.push(ARRAY_VALUE); - return util::Status::OK; + return util::Status(); } util::Status JsonStreamParser::ParseArrayValue(TokenType type) { @@ -616,14 +689,15 @@ util::Status JsonStreamParser::ParseArrayValue(TokenType type) { if (type == END_ARRAY) { ow_->EndList(); Advance(); - return util::Status::OK; + return util::Status(); } // The ParseValue call may push something onto the stack so we need to make - // sure an ARRAY_MID is after it, so we push it on now. + // sure an ARRAY_MID is after it, so we push it on now. Also, the parsing of + // empty-null array value is relying on this ARRAY_MID token. stack_.push(ARRAY_MID); util::Status result = ParseValue(type); - if (result == util::Status::CANCELLED) { + if (result == util::Status(error::CANCELLED, "")) { // If we were cancelled, pop back off the ARRAY_MID so we don't try to // push it on again when we try over. stack_.pop(); @@ -639,14 +713,14 @@ util::Status JsonStreamParser::ParseArrayMid(TokenType type) { if (type == END_ARRAY) { ow_->EndList(); Advance(); - return util::Status::OK; + return util::Status(); } // Found a comma, advance past it and expect an array value next. if (type == VALUE_SEPARATOR) { Advance(); stack_.push(ARRAY_VALUE); - return util::Status::OK; + return util::Status(); } // Illegal token after array value. return ReportFailure("Expected , or ] after array value."); @@ -654,31 +728,44 @@ util::Status JsonStreamParser::ParseArrayMid(TokenType type) { util::Status JsonStreamParser::ParseTrue() { ow_->RenderBool(key_, true); - key_.clear(); + key_ = StringPiece(); p_.remove_prefix(true_len); - return util::Status::OK; + return util::Status(); } util::Status JsonStreamParser::ParseFalse() { ow_->RenderBool(key_, false); - key_.clear(); + key_ = StringPiece(); p_.remove_prefix(false_len); - return util::Status::OK; + return util::Status(); } util::Status JsonStreamParser::ParseNull() { ow_->RenderNull(key_); - key_.clear(); + key_ = StringPiece(); p_.remove_prefix(null_len); - return util::Status::OK; + return util::Status(); +} + +util::Status JsonStreamParser::ParseEmptyNull() { + ow_->RenderNull(key_); + key_ = StringPiece(); + return util::Status(); +} + +bool JsonStreamParser::IsEmptyNullAllowed(TokenType type) { + if (stack_.empty()) return false; + return (stack_.top() == ARRAY_MID && type == VALUE_SEPARATOR) || + stack_.top() == OBJ_MID; } util::Status JsonStreamParser::ReportFailure(StringPiece message) { static const int kContextLength = 20; const char* p_start = p_.data(); const char* json_start = json_.data(); - const char* begin = max(p_start - kContextLength, json_start); - const char* end = min(p_start + kContextLength, json_start + json_.size()); + const char* begin = std::max(p_start - kContextLength, json_start); + const char* end = + std::min(p_start + kContextLength, json_start + json_.size()); StringPiece segment(begin, end - begin); string location(p_start - begin, ' '); location.push_back('^'); @@ -689,7 +776,7 @@ util::Status JsonStreamParser::ReportFailure(StringPiece message) { util::Status JsonStreamParser::ReportUnknown(StringPiece message) { // If we aren't finishing the parse, cancel parsing and try later. if (!finishing_) { - return util::Status::CANCELLED; + return util::Status(error::CANCELLED, ""); } if (p_.empty()) { return ReportFailure(StrCat("Unexpected end of string. ", message)); @@ -706,8 +793,8 @@ void JsonStreamParser::SkipWhitespace() { void JsonStreamParser::Advance() { // Advance by moving one UTF8 character while making sure we don't go beyond // the length of StringPiece. - p_.remove_prefix( - min<int>(p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length()))); + p_.remove_prefix(std::min<int>( + p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length()))); } util::Status JsonStreamParser::ParseKey() { @@ -719,11 +806,11 @@ util::Status JsonStreamParser::ParseKey() { // we can't know if the key was complete or not. if (!finishing_ && p_.empty()) { p_ = original; - return util::Status::CANCELLED; + return util::Status(error::CANCELLED, ""); } // Since we aren't using the key storage, clear it out. key_storage_.clear(); - return util::Status::OK; + return util::Status(); } JsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() { |