From 3b3c8abb9635eb3ea078a821a99c9ef29d66dff7 Mon Sep 17 00:00:00 2001
From: Jisi Liu <jisi.liu@gmail.com>
Date: Wed, 30 Mar 2016 11:39:59 -0700
Subject: Integrate google internal changes.

---
 .../protobuf/util/internal/json_stream_parser.cc   | 62 +++++++++++++++++++---
 1 file changed, 54 insertions(+), 8 deletions(-)

(limited to 'src/google/protobuf/util/internal/json_stream_parser.cc')

diff --git a/src/google/protobuf/util/internal/json_stream_parser.cc b/src/google/protobuf/util/internal/json_stream_parser.cc
index df916751..39be7b03 100644
--- a/src/google/protobuf/util/internal/json_stream_parser.cc
+++ b/src/google/protobuf/util/internal/json_stream_parser.cc
@@ -42,8 +42,9 @@
 
 #include <google/protobuf/stubs/logging.h>
 #include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/strutil.h>
 #include <google/protobuf/util/internal/object_writer.h>
+#include <google/protobuf/util/internal/json_escaping.h>
+#include <google/protobuf/stubs/strutil.h>
 
 namespace google {
 namespace protobuf {
@@ -59,7 +60,7 @@ using util::error::INVALID_ARGUMENT;
 
 namespace converter {
 
-// Number of digits in a unicode escape sequence (/uXXXX)
+// Number of digits in an escaped UTF-16 code unit ('\\' 'u' X X X X)
 static const int kUnicodeEscapedLength = 6;
 
 // Length of the true, false, and null literals.
@@ -419,9 +420,45 @@ util::Status JsonStreamParser::ParseUnicodeEscape() {
     }
     code = (code << 4) + hex_digit_to_int(p_.data()[i]);
   }
+  if (code >= JsonEscaping::kMinHighSurrogate &&
+      code <= JsonEscaping::kMaxHighSurrogate) {
+    if (p_.length() < 2 * kUnicodeEscapedLength) {
+      if (!finishing_) {
+        return util::Status::CANCELLED;
+      }
+      if (!coerce_to_utf8_) {
+        return ReportFailure("Missing low surrogate.");
+      }
+    } else if (p_.data()[kUnicodeEscapedLength] == '\\' &&
+               p_.data()[kUnicodeEscapedLength + 1] == 'u') {
+      uint32 low_code = 0;
+      for (int i = kUnicodeEscapedLength + 2; i < 2 * kUnicodeEscapedLength;
+           ++i) {
+        if (!isxdigit(p_.data()[i])) {
+          return ReportFailure("Invalid escape sequence.");
+        }
+        low_code = (low_code << 4) + hex_digit_to_int(p_.data()[i]);
+      }
+      if (low_code >= JsonEscaping::kMinLowSurrogate &&
+          low_code <= JsonEscaping::kMaxLowSurrogate) {
+        // Convert UTF-16 surrogate pair to 21-bit Unicode codepoint.
+        code = (((code & 0x3FF) << 10) | (low_code & 0x3FF)) +
+               JsonEscaping::kMinSupplementaryCodePoint;
+        // Advance past the first code unit escape.
+        p_.remove_prefix(kUnicodeEscapedLength);
+      } else if (!coerce_to_utf8_) {
+        return ReportFailure("Invalid low surrogate.");
+      }
+    } else if (!coerce_to_utf8_) {
+      return ReportFailure("Missing low surrogate.");
+    }
+  }
+  if (!coerce_to_utf8_ && !IsValidCodePoint(code)) {
+    return ReportFailure("Invalid unicode code point.");
+  }
   char buf[UTFmax];
   int len = EncodeAsUTF8Char(code, buf);
-  // Advance past the unicode escape.
+  // Advance past the [final] code unit escape.
   p_.remove_prefix(kUnicodeEscapedLength);
   parsed_storage_.append(buf, len);
   return util::Status::OK;
@@ -473,7 +510,7 @@ util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
       floating = true;
       continue;
     }
-    if (c == '+' || c == '-') continue;
+    if (c == '+' || c == '-' || c == 'x') continue;
     // Not a valid number character, break out.
     break;
   }
@@ -499,6 +536,10 @@ util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
 
   // Positive non-floating point number, parse as a uint64.
   if (!negative) {
+    // Octal/Hex numbers are not valid JSON values.
+    if (number.length() >= 2 && number[0] == '0') {
+      return ReportFailure("Octal/hex numbers are not valid JSON values.");
+    }
     if (!safe_strtou64(number, &result->uint_val)) {
       return ReportFailure("Unable to parse number.");
     }
@@ -507,6 +548,10 @@ util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
     return util::Status::OK;
   }
 
+  // Octal/Hex numbers are not valid JSON values.
+  if (number.length() >= 3 && number[1] == '0') {
+    return ReportFailure("Octal/hex numbers are not valid JSON values.");
+  }
   // Negative non-floating point number, parse as an int64.
   if (!safe_strto64(number, &result->int_val)) {
     return ReportFailure("Unable to parse number.");
@@ -677,8 +722,9 @@ util::Status JsonStreamParser::ReportFailure(StringPiece message) {
   static const int kContextLength = 20;
   const char* p_start = p_.data();
   const char* json_start = json_.data();
-  const char* begin = max(p_start - kContextLength, json_start);
-  const char* end = min(p_start + kContextLength, json_start + json_.size());
+  const char* begin = std::max(p_start - kContextLength, json_start);
+  const char* end =
+      std::min(p_start + kContextLength, json_start + json_.size());
   StringPiece segment(begin, end - begin);
   string location(p_start - begin, ' ');
   location.push_back('^');
@@ -706,8 +752,8 @@ void JsonStreamParser::SkipWhitespace() {
 void JsonStreamParser::Advance() {
   // Advance by moving one UTF8 character while making sure we don't go beyond
   // the length of StringPiece.
-  p_.remove_prefix(
-      min<int>(p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length())));
+  p_.remove_prefix(std::min<int>(
+      p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length())));
 }
 
 util::Status JsonStreamParser::ParseKey() {
-- 
cgit v1.2.3