aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/google/protobuf/util/internal/json_stream_parser.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/google/protobuf/util/internal/json_stream_parser.cc')
-rw-r--r--src/google/protobuf/util/internal/json_stream_parser.cc239
1 files changed, 163 insertions, 76 deletions
diff --git a/src/google/protobuf/util/internal/json_stream_parser.cc b/src/google/protobuf/util/internal/json_stream_parser.cc
index df916751..1c63b31d 100644
--- a/src/google/protobuf/util/internal/json_stream_parser.cc
+++ b/src/google/protobuf/util/internal/json_stream_parser.cc
@@ -36,14 +36,14 @@
#include <cstdlib>
#include <cstring>
#include <memory>
-#ifndef _SHARED_PTR_H
-#include <google/protobuf/stubs/shared_ptr.h>
-#endif
#include <google/protobuf/stubs/logging.h>
#include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/strutil.h>
#include <google/protobuf/util/internal/object_writer.h>
+#include <google/protobuf/util/internal/json_escaping.h>
+#include <google/protobuf/stubs/strutil.h>
+#include <google/protobuf/stubs/mathlimits.h>
+
namespace google {
namespace protobuf {
@@ -53,13 +53,14 @@ namespace util {
// this file.
using util::Status;
namespace error {
+using util::error::CANCELLED;
using util::error::INTERNAL;
using util::error::INVALID_ARGUMENT;
} // namespace error
namespace converter {
-// Number of digits in a unicode escape sequence (/uXXXX)
+// Number of digits in an escaped UTF-16 code unit ('\\' 'u' X X X X)
static const int kUnicodeEscapedLength = 6;
// Length of the true, false, and null literals.
@@ -106,7 +107,9 @@ JsonStreamParser::JsonStreamParser(ObjectWriter* ow)
parsed_storage_(),
string_open_(0),
chunk_storage_(),
- coerce_to_utf8_(false) {
+ coerce_to_utf8_(false),
+ allow_empty_null_(false),
+ loose_float_number_conversion_(false) {
// Initialize the stack with a single value to be parsed.
stack_.push(VALUE);
}
@@ -124,7 +127,7 @@ util::Status JsonStreamParser::Parse(StringPiece json) {
// Don't point chunk to leftover_ because leftover_ will be updated in
// ParseChunk(chunk).
chunk_storage_.swap(leftover_);
- json.AppendToString(&chunk_storage_);
+ StrAppend(&chunk_storage_, json);
chunk = StringPiece(chunk_storage_);
}
@@ -135,11 +138,11 @@ util::Status JsonStreamParser::Parse(StringPiece json) {
// Any leftover characters are stashed in leftover_ for later parsing when
// there is more data available.
- chunk.substr(n).AppendToString(&leftover_);
+ StrAppend(&leftover_, chunk.substr(n));
return status;
} else {
- chunk.CopyToString(&leftover_);
- return util::Status::OK;
+ leftover_.assign(chunk.data(), chunk.size());
+ return util::Status();
}
}
@@ -147,11 +150,11 @@ util::Status JsonStreamParser::FinishParse() {
// If we do not expect anything and there is nothing left to parse we're all
// done.
if (stack_.empty() && leftover_.empty()) {
- return util::Status::OK;
+ return util::Status();
}
// Storage for UTF8-coerced string.
- google::protobuf::scoped_array<char> utf8;
+ std::unique_ptr<char[]> utf8;
if (coerce_to_utf8_) {
utf8.reset(new char[leftover_.size()]);
char* coerced = internal::UTF8CoerceToStructurallyValid(leftover_, utf8.get(), ' ');
@@ -178,7 +181,7 @@ util::Status JsonStreamParser::FinishParse() {
util::Status JsonStreamParser::ParseChunk(StringPiece chunk) {
// Do not do any work if the chunk is empty.
- if (chunk.empty()) return util::Status::OK;
+ if (chunk.empty()) return util::Status();
p_ = json_ = chunk;
@@ -200,7 +203,7 @@ util::Status JsonStreamParser::ParseChunk(StringPiece chunk) {
// unparsed data left, we save it for later parse.
leftover_ = p_.ToString();
}
- return util::Status::OK;
+ return util::Status();
}
util::Status JsonStreamParser::RunParser() {
@@ -241,20 +244,20 @@ util::Status JsonStreamParser::RunParser() {
}
if (!result.ok()) {
// If we were cancelled, save our state and try again later.
- if (!finishing_ && result == util::Status::CANCELLED) {
+ if (!finishing_ && result == util::Status(error::CANCELLED, "")) {
stack_.push(type);
// If we have a key we still need to render, make sure to save off the
// contents in our own storage.
if (!key_.empty() && key_storage_.empty()) {
- key_.AppendToString(&key_storage_);
+ StrAppend(&key_storage_, key_);
key_ = StringPiece(key_storage_);
}
- result = util::Status::OK;
+ result = util::Status();
}
return result;
}
}
- return util::Status::OK;
+ return util::Status();
}
util::Status JsonStreamParser::ParseValue(TokenType type) {
@@ -276,12 +279,16 @@ util::Status JsonStreamParser::ParseValue(TokenType type) {
case UNKNOWN:
return ReportUnknown("Expected a value.");
default: {
+ if (allow_empty_null_ && IsEmptyNullAllowed(type)) {
+ return ParseEmptyNull();
+ }
+
// Special case for having been cut off while parsing, wait for more data.
// This handles things like 'fals' being at the end of the string, we
// don't know if the next char would be e, completing it, or something
// else, making it invalid.
if (!finishing_ && p_.length() < false_len) {
- return util::Status::CANCELLED;
+ return util::Status(error::CANCELLED, "");
}
return ReportFailure("Unexpected token.");
}
@@ -292,8 +299,8 @@ util::Status JsonStreamParser::ParseString() {
util::Status result = ParseStringHelper();
if (result.ok()) {
ow_->RenderString(key_, parsed_);
- key_.clear();
- parsed_.clear();
+ key_ = StringPiece();
+ parsed_ = StringPiece();
parsed_storage_.clear();
}
return result;
@@ -314,13 +321,12 @@ util::Status JsonStreamParser::ParseStringHelper() {
// We're about to handle an escape, copy all bytes from last to data.
if (last < data) {
parsed_storage_.append(last, data - last);
- last = data;
}
// If we ran out of string after the \, cancel or report an error
// depending on if we expect more data later.
if (p_.length() == 1) {
if (!finishing_) {
- return util::Status::CANCELLED;
+ return util::Status(error::CANCELLED, "");
}
return ReportFailure("Closing quote expected in string.");
}
@@ -370,7 +376,6 @@ util::Status JsonStreamParser::ParseStringHelper() {
} else {
if (last < data) {
parsed_storage_.append(last, data - last);
- last = data;
}
parsed_ = StringPiece(parsed_storage_);
}
@@ -378,7 +383,7 @@ util::Status JsonStreamParser::ParseStringHelper() {
// start fresh.
string_open_ = 0;
Advance();
- return util::Status::OK;
+ return util::Status();
}
// Normal character, just advance past it.
Advance();
@@ -389,7 +394,7 @@ util::Status JsonStreamParser::ParseStringHelper() {
}
// If we didn't find the closing quote but we expect more data, cancel for now
if (!finishing_) {
- return util::Status::CANCELLED;
+ return util::Status(error::CANCELLED, "");
}
// End of string reached without a closing quote, report an error.
string_open_ = 0;
@@ -406,7 +411,7 @@ util::Status JsonStreamParser::ParseStringHelper() {
util::Status JsonStreamParser::ParseUnicodeEscape() {
if (p_.length() < kUnicodeEscapedLength) {
if (!finishing_) {
- return util::Status::CANCELLED;
+ return util::Status(error::CANCELLED, "");
}
return ReportFailure("Illegal hex string.");
}
@@ -419,12 +424,48 @@ util::Status JsonStreamParser::ParseUnicodeEscape() {
}
code = (code << 4) + hex_digit_to_int(p_.data()[i]);
}
+ if (code >= JsonEscaping::kMinHighSurrogate &&
+ code <= JsonEscaping::kMaxHighSurrogate) {
+ if (p_.length() < 2 * kUnicodeEscapedLength) {
+ if (!finishing_) {
+ return util::Status(error::CANCELLED, "");
+ }
+ if (!coerce_to_utf8_) {
+ return ReportFailure("Missing low surrogate.");
+ }
+ } else if (p_.data()[kUnicodeEscapedLength] == '\\' &&
+ p_.data()[kUnicodeEscapedLength + 1] == 'u') {
+ uint32 low_code = 0;
+ for (int i = kUnicodeEscapedLength + 2; i < 2 * kUnicodeEscapedLength;
+ ++i) {
+ if (!isxdigit(p_.data()[i])) {
+ return ReportFailure("Invalid escape sequence.");
+ }
+ low_code = (low_code << 4) + hex_digit_to_int(p_.data()[i]);
+ }
+ if (low_code >= JsonEscaping::kMinLowSurrogate &&
+ low_code <= JsonEscaping::kMaxLowSurrogate) {
+ // Convert UTF-16 surrogate pair to 21-bit Unicode codepoint.
+ code = (((code & 0x3FF) << 10) | (low_code & 0x3FF)) +
+ JsonEscaping::kMinSupplementaryCodePoint;
+ // Advance past the first code unit escape.
+ p_.remove_prefix(kUnicodeEscapedLength);
+ } else if (!coerce_to_utf8_) {
+ return ReportFailure("Invalid low surrogate.");
+ }
+ } else if (!coerce_to_utf8_) {
+ return ReportFailure("Missing low surrogate.");
+ }
+ }
+ if (!coerce_to_utf8_ && !IsValidCodePoint(code)) {
+ return ReportFailure("Invalid unicode code point.");
+ }
char buf[UTFmax];
int len = EncodeAsUTF8Char(code, buf);
- // Advance past the unicode escape.
+ // Advance past the [final] code unit escape.
p_.remove_prefix(kUnicodeEscapedLength);
parsed_storage_.append(buf, len);
- return util::Status::OK;
+ return util::Status();
}
util::Status JsonStreamParser::ParseNumber() {
@@ -434,17 +475,17 @@ util::Status JsonStreamParser::ParseNumber() {
switch (number.type) {
case NumberResult::DOUBLE:
ow_->RenderDouble(key_, number.double_val);
- key_.clear();
+ key_ = StringPiece();
break;
case NumberResult::INT:
ow_->RenderInt64(key_, number.int_val);
- key_.clear();
+ key_ = StringPiece();
break;
case NumberResult::UINT:
ow_->RenderUint64(key_, number.uint_val);
- key_.clear();
+ key_ = StringPiece();
break;
default:
@@ -454,6 +495,19 @@ util::Status JsonStreamParser::ParseNumber() {
return result;
}
+util::Status JsonStreamParser::ParseDoubleHelper(
+ const string& number, NumberResult* result) {
+ if (!safe_strtod(number, &result->double_val)) {
+ return ReportFailure("Unable to parse number.");
+ }
+ if (!loose_float_number_conversion_ &&
+ !MathLimits<double>::IsFinite(result->double_val)) {
+ return ReportFailure("Number exceeds the range of double.");
+ }
+ result->type = NumberResult::DOUBLE;
+ return util::Status();
+}
+
util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
const char* data = p_.data();
int length = p_.length();
@@ -473,7 +527,7 @@ util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
floating = true;
continue;
}
- if (c == '+' || c == '-') continue;
+ if (c == '+' || c == '-' || c == 'x') continue;
// Not a valid number character, break out.
break;
}
@@ -481,7 +535,7 @@ util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
// If the entire input is a valid number, and we may have more content in the
// future, we abort for now and resume when we know more.
if (index == length && !finishing_) {
- return util::Status::CANCELLED;
+ return util::Status(error::CANCELLED, "");
}
// Create a string containing just the number, so we can use safe_strtoX
@@ -489,40 +543,59 @@ util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
// Floating point number, parse as a double.
if (floating) {
- if (!safe_strtod(number, &result->double_val)) {
- return ReportFailure("Unable to parse number.");
+ util::Status status = ParseDoubleHelper(number, result);
+ if (status.ok()) {
+ p_.remove_prefix(index);
}
- result->type = NumberResult::DOUBLE;
- p_.remove_prefix(index);
- return util::Status::OK;
+ return status;
}
// Positive non-floating point number, parse as a uint64.
if (!negative) {
- if (!safe_strtou64(number, &result->uint_val)) {
- return ReportFailure("Unable to parse number.");
+ // Octal/Hex numbers are not valid JSON values.
+ if (number.length() >= 2 && number[0] == '0') {
+ return ReportFailure("Octal/hex numbers are not valid JSON values.");
+ }
+ if (safe_strtou64(number, &result->uint_val)) {
+ result->type = NumberResult::UINT;
+ p_.remove_prefix(index);
+ return util::Status();
+ } else {
+ // If the value is too large, parse it as double.
+ util::Status status = ParseDoubleHelper(number, result);
+ if (status.ok()) {
+ p_.remove_prefix(index);
+ }
+ return status;
}
- result->type = NumberResult::UINT;
- p_.remove_prefix(index);
- return util::Status::OK;
}
+ // Octal/Hex numbers are not valid JSON values.
+ if (number.length() >= 3 && number[1] == '0') {
+ return ReportFailure("Octal/hex numbers are not valid JSON values.");
+ }
// Negative non-floating point number, parse as an int64.
- if (!safe_strto64(number, &result->int_val)) {
- return ReportFailure("Unable to parse number.");
+ if (safe_strto64(number, &result->int_val)) {
+ result->type = NumberResult::INT;
+ p_.remove_prefix(index);
+ return util::Status();
+ } else {
+ // If the value is too large, parse it as double.
+ util::Status status = ParseDoubleHelper(number, result);
+ if (status.ok()) {
+ p_.remove_prefix(index);
+ }
+ return status;
}
- result->type = NumberResult::INT;
- p_.remove_prefix(index);
- return util::Status::OK;
}
util::Status JsonStreamParser::HandleBeginObject() {
GOOGLE_DCHECK_EQ('{', *p_.data());
Advance();
ow_->StartObject(key_);
- key_.clear();
+ key_ = StringPiece();
stack_.push(ENTRY);
- return util::Status::OK;
+ return util::Status();
}
util::Status JsonStreamParser::ParseObjectMid(TokenType type) {
@@ -534,13 +607,13 @@ util::Status JsonStreamParser::ParseObjectMid(TokenType type) {
if (type == END_OBJECT) {
Advance();
ow_->EndObject();
- return util::Status::OK;
+ return util::Status();
}
// Found a comma, advance past it and get ready for an entry.
if (type == VALUE_SEPARATOR) {
Advance();
stack_.push(ENTRY);
- return util::Status::OK;
+ return util::Status();
}
// Illegal token after key:value pair.
return ReportFailure("Expected , or } after key:value pair.");
@@ -555,7 +628,7 @@ util::Status JsonStreamParser::ParseEntry(TokenType type) {
if (type == END_OBJECT) {
ow_->EndObject();
Advance();
- return util::Status::OK;
+ return util::Status();
}
util::Status result;
@@ -570,7 +643,7 @@ util::Status JsonStreamParser::ParseEntry(TokenType type) {
} else {
key_ = parsed_;
}
- parsed_.clear();
+ parsed_ = StringPiece();
}
} else if (type == BEGIN_KEY) {
// Key is a bare key (back compat), create a StringPiece pointing to it.
@@ -594,7 +667,7 @@ util::Status JsonStreamParser::ParseEntryMid(TokenType type) {
if (type == ENTRY_SEPARATOR) {
Advance();
stack_.push(VALUE);
- return util::Status::OK;
+ return util::Status();
}
return ReportFailure("Expected : between key:value pair.");
}
@@ -603,9 +676,9 @@ util::Status JsonStreamParser::HandleBeginArray() {
GOOGLE_DCHECK_EQ('[', *p_.data());
Advance();
ow_->StartList(key_);
- key_.clear();
+ key_ = StringPiece();
stack_.push(ARRAY_VALUE);
- return util::Status::OK;
+ return util::Status();
}
util::Status JsonStreamParser::ParseArrayValue(TokenType type) {
@@ -616,14 +689,15 @@ util::Status JsonStreamParser::ParseArrayValue(TokenType type) {
if (type == END_ARRAY) {
ow_->EndList();
Advance();
- return util::Status::OK;
+ return util::Status();
}
// The ParseValue call may push something onto the stack so we need to make
- // sure an ARRAY_MID is after it, so we push it on now.
+ // sure an ARRAY_MID is after it, so we push it on now. Also, the parsing of
+ // empty-null array value is relying on this ARRAY_MID token.
stack_.push(ARRAY_MID);
util::Status result = ParseValue(type);
- if (result == util::Status::CANCELLED) {
+ if (result == util::Status(error::CANCELLED, "")) {
// If we were cancelled, pop back off the ARRAY_MID so we don't try to
// push it on again when we try over.
stack_.pop();
@@ -639,14 +713,14 @@ util::Status JsonStreamParser::ParseArrayMid(TokenType type) {
if (type == END_ARRAY) {
ow_->EndList();
Advance();
- return util::Status::OK;
+ return util::Status();
}
// Found a comma, advance past it and expect an array value next.
if (type == VALUE_SEPARATOR) {
Advance();
stack_.push(ARRAY_VALUE);
- return util::Status::OK;
+ return util::Status();
}
// Illegal token after array value.
return ReportFailure("Expected , or ] after array value.");
@@ -654,31 +728,44 @@ util::Status JsonStreamParser::ParseArrayMid(TokenType type) {
util::Status JsonStreamParser::ParseTrue() {
ow_->RenderBool(key_, true);
- key_.clear();
+ key_ = StringPiece();
p_.remove_prefix(true_len);
- return util::Status::OK;
+ return util::Status();
}
util::Status JsonStreamParser::ParseFalse() {
ow_->RenderBool(key_, false);
- key_.clear();
+ key_ = StringPiece();
p_.remove_prefix(false_len);
- return util::Status::OK;
+ return util::Status();
}
util::Status JsonStreamParser::ParseNull() {
ow_->RenderNull(key_);
- key_.clear();
+ key_ = StringPiece();
p_.remove_prefix(null_len);
- return util::Status::OK;
+ return util::Status();
+}
+
+util::Status JsonStreamParser::ParseEmptyNull() {
+ ow_->RenderNull(key_);
+ key_ = StringPiece();
+ return util::Status();
+}
+
+bool JsonStreamParser::IsEmptyNullAllowed(TokenType type) {
+ if (stack_.empty()) return false;
+ return (stack_.top() == ARRAY_MID && type == VALUE_SEPARATOR) ||
+ stack_.top() == OBJ_MID;
}
util::Status JsonStreamParser::ReportFailure(StringPiece message) {
static const int kContextLength = 20;
const char* p_start = p_.data();
const char* json_start = json_.data();
- const char* begin = max(p_start - kContextLength, json_start);
- const char* end = min(p_start + kContextLength, json_start + json_.size());
+ const char* begin = std::max(p_start - kContextLength, json_start);
+ const char* end =
+ std::min(p_start + kContextLength, json_start + json_.size());
StringPiece segment(begin, end - begin);
string location(p_start - begin, ' ');
location.push_back('^');
@@ -689,7 +776,7 @@ util::Status JsonStreamParser::ReportFailure(StringPiece message) {
util::Status JsonStreamParser::ReportUnknown(StringPiece message) {
// If we aren't finishing the parse, cancel parsing and try later.
if (!finishing_) {
- return util::Status::CANCELLED;
+ return util::Status(error::CANCELLED, "");
}
if (p_.empty()) {
return ReportFailure(StrCat("Unexpected end of string. ", message));
@@ -706,8 +793,8 @@ void JsonStreamParser::SkipWhitespace() {
void JsonStreamParser::Advance() {
// Advance by moving one UTF8 character while making sure we don't go beyond
// the length of StringPiece.
- p_.remove_prefix(
- min<int>(p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length())));
+ p_.remove_prefix(std::min<int>(
+ p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length())));
}
util::Status JsonStreamParser::ParseKey() {
@@ -719,11 +806,11 @@ util::Status JsonStreamParser::ParseKey() {
// we can't know if the key was complete or not.
if (!finishing_ && p_.empty()) {
p_ = original;
- return util::Status::CANCELLED;
+ return util::Status(error::CANCELLED, "");
}
// Since we aren't using the key storage, clear it out.
key_storage_.clear();
- return util::Status::OK;
+ return util::Status();
}
JsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() {