down integrate to svn

author: jieluo@google.com <jieluo@google.com@630680e5-0e50-0410-840e-4b1c322b438d> 2014-07-18 00:47:59 +0000
committer: jieluo@google.com <jieluo@google.com@630680e5-0e50-0410-840e-4b1c322b438d> 2014-07-18 00:47:59 +0000
commit: 4de8f55113007fdc8e34107950e605fc0209d465 (patch)
tree: 92b7da8757a7740d9e1f2d3ead233542947d8c8c /src/google/protobuf/io
parent: c5553a3d18f80132b9079c5504bc0aa1f7f950a0 (diff)
18 files changed, 674 insertions, 58 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc
index 36add8c3..f6a84533 100644
--- a/src/google/protobuf/io/coded_stream.cc
+++ b/src/google/protobuf/io/coded_stream.cc
@@ -83,6 +83,10 @@ CodedInputStream::~CodedInputStream() {
 int CodedInputStream::default_recursion_limit_ = 100;
 
 
+void CodedOutputStream::EnableAliasing(bool enabled) {
+  aliasing_enabled_ = enabled && output_->AllowsAliasing();
+}
+
 void CodedInputStream::BackUpInputToCurrentPosition() {
   int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_;
   if (backup_bytes > 0) {
@@ -167,6 +171,11 @@ void CodedInputStream::SetTotalBytesLimit(
   RecomputeBufferLimits();
 }
 
+int CodedInputStream::BytesUntilTotalBytesLimit() const {
+  if (total_bytes_limit_ == INT_MAX) return -1;
+  return total_bytes_limit_ - CurrentPosition();
+}
+
 void CodedInputStream::PrintTotalBytesLimitError() {
   GOOGLE_LOG(ERROR) << "A protocol message was rejected because it was too "
                 "big (more than " << total_bytes_limit_
@@ -247,6 +256,14 @@ bool CodedInputStream::ReadStringFallback(string* buffer, int size) {
     buffer->clear();
   }
 
+  int closest_limit = min(current_limit_, total_bytes_limit_);
+  if (closest_limit != INT_MAX) {
+    int bytes_to_limit = closest_limit - CurrentPosition();
+    if (bytes_to_limit > 0 && size > 0 && size <= bytes_to_limit) {
+      buffer->reserve(size);
+    }
+  }
+
   int current_buffer_size;
   while ((current_buffer_size = BufferSize()) < size) {
     // Some STL implementations "helpfully" crash on buffer->append(NULL, 0).
@@ -313,11 +330,16 @@ inline const uint8* ReadVarint32FromArray(const uint8* buffer, uint32* value) {
   uint32 b;
   uint32 result;
 
-  b = *(ptr++); result  = (b & 0x7F)      ; if (!(b & 0x80)) goto done;
-  b = *(ptr++); result |= (b & 0x7F) <<  7; if (!(b & 0x80)) goto done;
-  b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
-  b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
-  b = *(ptr++); result |=  b         << 28; if (!(b & 0x80)) goto done;
+  b = *(ptr++); result  = b      ; if (!(b & 0x80)) goto done;
+  result -= 0x80;
+  b = *(ptr++); result += b <<  7; if (!(b & 0x80)) goto done;
+  result -= 0x80 << 7;
+  b = *(ptr++); result += b << 14; if (!(b & 0x80)) goto done;
+  result -= 0x80 << 14;
+  b = *(ptr++); result += b << 21; if (!(b & 0x80)) goto done;
+  result -= 0x80 << 21;
+  b = *(ptr++); result += b << 28; if (!(b & 0x80)) goto done;
+  // "result -= 0x80 << 28" is irrevelant.
 
   // If the input is larger than 32 bits, we still need to read it all
   // and discard the high-order bits.
@@ -347,8 +369,8 @@ bool CodedInputStream::ReadVarint32Slow(uint32* value) {
 
 bool CodedInputStream::ReadVarint32Fallback(uint32* value) {
   if (BufferSize() >= kMaxVarintBytes ||
-      // Optimization:  If the varint ends at exactly the end of the buffer,
-      // we can detect that and still use the fast path.
+      // Optimization:  We're also safe if the buffer is non-empty and it ends
+      // with a byte that would terminate a varint.
       (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
     const uint8* end = ReadVarint32FromArray(buffer_, value);
     if (end == NULL) return false;
@@ -391,8 +413,8 @@ uint32 CodedInputStream::ReadTagSlow() {
 uint32 CodedInputStream::ReadTagFallback() {
   const int buf_size = BufferSize();
   if (buf_size >= kMaxVarintBytes ||
-      // Optimization:  If the varint ends at exactly the end of the buffer,
-      // we can detect that and still use the fast path.
+      // Optimization:  We're also safe if the buffer is non-empty and it ends
+      // with a byte that would terminate a varint.
       (buf_size > 0 && !(buffer_end_[-1] & 0x80))) {
     uint32 tag;
     const uint8* end = ReadVarint32FromArray(buffer_, &tag);
@@ -444,8 +466,8 @@ bool CodedInputStream::ReadVarint64Slow(uint64* value) {
 
 bool CodedInputStream::ReadVarint64Fallback(uint64* value) {
   if (BufferSize() >= kMaxVarintBytes ||
-      // Optimization:  If the varint ends at exactly the end of the buffer,
-      // we can detect that and still use the fast path.
+      // Optimization:  We're also safe if the buffer is non-empty and it ends
+      // with a byte that would terminate a varint.
       (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
     // Fast path:  We have enough bytes left in the buffer to guarantee that
     // this read won't cross the end, so we can skip the checks.
@@ -457,16 +479,26 @@ bool CodedInputStream::ReadVarint64Fallback(uint64* value) {
     // processors.
     uint32 part0 = 0, part1 = 0, part2 = 0;
 
-    b = *(ptr++); part0  = (b & 0x7F)      ; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part0 |= (b & 0x7F) <<  7; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part0 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part0 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part1  = (b & 0x7F)      ; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part1 |= (b & 0x7F) <<  7; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part1 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part1 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part2  = (b & 0x7F)      ; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part2 |= (b & 0x7F) <<  7; if (!(b & 0x80)) goto done;
+    b = *(ptr++); part0  = b      ; if (!(b & 0x80)) goto done;
+    part0 -= 0x80;
+    b = *(ptr++); part0 += b <<  7; if (!(b & 0x80)) goto done;
+    part0 -= 0x80 << 7;
+    b = *(ptr++); part0 += b << 14; if (!(b & 0x80)) goto done;
+    part0 -= 0x80 << 14;
+    b = *(ptr++); part0 += b << 21; if (!(b & 0x80)) goto done;
+    part0 -= 0x80 << 21;
+    b = *(ptr++); part1  = b      ; if (!(b & 0x80)) goto done;
+    part1 -= 0x80;
+    b = *(ptr++); part1 += b <<  7; if (!(b & 0x80)) goto done;
+    part1 -= 0x80 << 7;
+    b = *(ptr++); part1 += b << 14; if (!(b & 0x80)) goto done;
+    part1 -= 0x80 << 14;
+    b = *(ptr++); part1 += b << 21; if (!(b & 0x80)) goto done;
+    part1 -= 0x80 << 21;
+    b = *(ptr++); part2  = b      ; if (!(b & 0x80)) goto done;
+    part2 -= 0x80;
+    b = *(ptr++); part2 += b <<  7; if (!(b & 0x80)) goto done;
+    // "part2 -= 0x80 << 7" is irrelevant because (0x80 << 7) << 56 is 0.
 
     // We have overrun the maximum size of a varint (10 bytes).  The data
     // must be corrupt.
@@ -555,7 +587,8 @@ CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output)
     buffer_(NULL),
     buffer_size_(0),
     total_bytes_(0),
-    had_error_(false) {
+    had_error_(false),
+    aliasing_enabled_(false) {
   // Eagerly Refresh() so buffer space is immediately available.
   Refresh();
   // The Refresh() may have failed. If the client doesn't write any data,
@@ -609,6 +642,23 @@ uint8* CodedOutputStream::WriteRawToArray(
 }
 
 
+void CodedOutputStream::WriteAliasedRaw(const void* data, int size) {
+  if (size < buffer_size_
+      ) {
+    WriteRaw(data, size);
+  } else {
+    if (buffer_size_ > 0) {
+      output_->BackUp(buffer_size_);
+      total_bytes_ -= buffer_size_;
+      buffer_ = NULL;
+      buffer_size_ = 0;
+    }
+
+    total_bytes_ += size;
+    had_error_ |= !output_->WriteAliasedRaw(data, size);
+  }
+}
+
 void CodedOutputStream::WriteLittleEndian32(uint32 value) {
   uint8 bytes[sizeof(value)];
 
@@ -852,6 +902,13 @@ int CodedOutputStream::VarintSize64(uint64 value) {
   }
 }
 
+uint8* CodedOutputStream::WriteStringWithSizeToArray(const string& str,
+                                                     uint8* target) {
+  GOOGLE_DCHECK_LE(str.size(), kuint32max);
+  target = WriteVarint32ToArray(str.size(), target);
+  return WriteStringToArray(str, target);
+}
+
 }  // namespace io
 }  // namespace protobuf
 }  // namespace google
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h
index 66cbee00..50a03a11 100644
--- a/src/google/protobuf/io/coded_stream.h
+++ b/src/google/protobuf/io/coded_stream.h
@@ -233,11 +233,22 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // Read a tag.  This calls ReadVarint32() and returns the result, or returns
   // zero (which is not a valid tag) if ReadVarint32() fails.  Also, it updates
   // the last tag value, which can be checked with LastTagWas().
-  // Always inline because this is only called in once place per parse loop
+  // Always inline because this is only called in one place per parse loop
   // but it is called for every iteration of said loop, so it should be fast.
   // GCC doesn't want to inline this by default.
   uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
 
+  // This usually a faster alternative to ReadTag() when cutoff is a manifest
+  // constant.  It does particularly well for cutoff >= 127.  The first part
+  // of the return value is the tag that was read, though it can also be 0 in
+  // the cases where ReadTag() would return 0.  If the second part is true
+  // then the tag is known to be in [0, cutoff].  If not, the tag either is
+  // above cutoff or is 0.  (There's intentional wiggle room when tag is 0,
+  // because that can arise in several ways, and for best performance we want
+  // to avoid an extra "is tag == 0?" check here.)
+  inline std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff)
+      GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
   // Usually returns true if calling ReadVarint32() now would produce the given
   // value.  Will always return false if ReadVarint32() would not return the
   // given value.  If ExpectTag() returns true, it also advances past
@@ -264,8 +275,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // zero, and ConsumedEntireMessage() will return true.
   bool ExpectAtEnd();
 
-  // If the last call to ReadTag() returned the given value, returns true.
-  // Otherwise, returns false;
+  // If the last call to ReadTag() or ReadTagWithCutoff() returned the
+  // given value, returns true.  Otherwise, returns false;
   //
   // This is needed because parsers for some types of embedded messages
   // (with field type TYPE_GROUP) don't actually know that they've reached the
@@ -333,7 +344,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // cause integer overflows is 512MB.  The default limit is 64MB.  Apps
   // should set shorter limits if possible.  If warning_threshold is not -1,
   // a warning will be printed to stderr after warning_threshold bytes are
-  // read.  For backwards compatibility all negative values get squached to -1,
+  // read.  For backwards compatibility all negative values get squashed to -1,
   // as other negative values might have special internal meanings.
   // An error will always be printed to stderr if the limit is reached.
   //
@@ -356,6 +367,10 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   //   something unusual.
   void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
 
+  // The Total Bytes Limit minus the Current Position, or -1 if there
+  // is no Total Bytes Limit.
+  int BytesUntilTotalBytesLimit() const;
+
   // Recursion Limit -------------------------------------------------
   // To prevent corrupt or malicious messages from causing stack overflows,
   // we must keep track of the depth of recursion when parsing embedded
@@ -466,7 +481,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   int overflow_bytes_;
 
   // LastTagWas() stuff.
-  uint32 last_tag_;         // result of last ReadTag().
+  uint32 last_tag_;         // result of last ReadTag() or ReadTagWithCutoff().
 
   // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
   // at EOF, or by ExpectAtEnd() when it returns true.  This happens when we
@@ -638,6 +653,9 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
 
   // Write raw bytes, copying them from the given buffer.
   void WriteRaw(const void* buffer, int size);
+  // Like WriteRaw()  but will try to write aliased data if aliasing is
+  // turned on.
+  void WriteRawMaybeAliased(const void* data, int size);
   // Like WriteRaw()  but writing directly to the target array.
   // This is _not_ inlined, as the compiler often optimizes memcpy into inline
   // copy loops. Since this gets called by every field with string or bytes
@@ -649,8 +667,21 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   void WriteString(const string& str);
   // Like WriteString()  but writing directly to the target array.
   static uint8* WriteStringToArray(const string& str, uint8* target);
+  // Write the varint-encoded size of str followed by str.
+  static uint8* WriteStringWithSizeToArray(const string& str, uint8* target);
 
 
+  // Instructs the CodedOutputStream to allow the underlying
+  // ZeroCopyOutputStream to hold pointers to the original structure instead of
+  // copying, if it supports it (i.e. output->AllowsAliasing() is true).  If the
+  // underlying stream does not support aliasing, then enabling it has no
+  // affect.  For now, this only affects the behavior of
+  // WriteRawMaybeAliased().
+  //
+  // NOTE: It is caller's responsibility to ensure that the chunk of memory
+  // remains live until all of the data has been consumed from the stream.
+  void EnableAliasing(bool enabled);
+
   // Write a 32-bit little-endian integer.
   void WriteLittleEndian32(uint32 value);
   // Like WriteLittleEndian32()  but writing directly to the target array.
@@ -725,6 +756,7 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   int buffer_size_;
   int total_bytes_;  // Sum of sizes of all buffers seen so far.
   bool had_error_;   // Whether an error occurred during output.
+  bool aliasing_enabled_;  // See EnableAliasing().
 
   // Advance the buffer by a given number of bytes.
   void Advance(int amount);
@@ -733,6 +765,10 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   // Advance(buffer_size_).
   bool Refresh();
 
+  // Like WriteRaw() but may avoid copying if the underlying
+  // ZeroCopyOutputStream supports it.
+  void WriteAliasedRaw(const void* buffer, int size);
+
   static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target);
 
   // Always-inlined versions of WriteVarint* functions so that code can be
@@ -850,6 +886,45 @@ inline uint32 CodedInputStream::ReadTag() {
   }
 }
 
+inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
+    uint32 cutoff) {
+  // In performance-sensitive code we can expect cutoff to be a compile-time
+  // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
+  // compile time.
+  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
+    // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
+    // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
+    // is large enough then is it better to check for the two-byte case first?
+    if (static_cast<int8>(buffer_[0]) > 0) {
+      const uint32 kMax1ByteVarint = 0x7f;
+      uint32 tag = last_tag_ = buffer_[0];
+      Advance(1);
+      return make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
+    }
+    // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
+    // and tag is two bytes.  The latter is tested by bitwise-and-not of the
+    // first byte and the second byte.
+    if (cutoff >= 0x80 &&
+        GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
+        GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
+      const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
+      uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
+      Advance(2);
+      // It might make sense to test for tag == 0 now, but it is so rare that
+      // that we don't bother.  A varint-encoded 0 should be one byte unless
+      // the encoder lost its mind.  The second part of the return value of
+      // this function is allowed to be either true or false if the tag is 0,
+      // so we don't have to check for tag == 0.  We may need to check whether
+      // it exceeds cutoff.
+      bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
+      return make_pair(tag, at_or_below_cutoff);
+    }
+  }
+  // Slow path
+  last_tag_ = ReadTagFallback();
+  return make_pair(last_tag_, static_cast<uint32>(last_tag_ - 1) < cutoff);
+}
+
 inline bool CodedInputStream::LastTagWas(uint32 expected) {
   return last_tag_ == expected;
 }
@@ -1029,6 +1104,15 @@ inline void CodedOutputStream::WriteString(const string& str) {
   WriteRaw(str.data(), static_cast<int>(str.size()));
 }
 
+inline void CodedOutputStream::WriteRawMaybeAliased(
+    const void* data, int size) {
+  if (aliasing_enabled_) {
+    WriteAliasedRaw(data, size);
+  } else {
+    WriteRaw(data, size);
+  }
+}
+
 inline uint8* CodedOutputStream::WriteStringToArray(
     const string& str, uint8* target) {
   return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
diff --git a/src/google/protobuf/io/coded_stream_inl.h b/src/google/protobuf/io/coded_stream_inl.h
index 144f44f0..41dc10e3 100644
--- a/src/google/protobuf/io/coded_stream_inl.h
+++ b/src/google/protobuf/io/coded_stream_inl.h
@@ -37,6 +37,7 @@
 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__
 
 #include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
 #include <string>
 #include <google/protobuf/stubs/stl_util.h>
 
@@ -53,7 +54,7 @@ inline bool CodedInputStream::InternalReadStringInline(string* buffer,
     // When buffer is empty, string_as_array(buffer) will return NULL but memcpy
     // requires non-NULL pointers even when size is 0. Hench this check.
     if (size > 0) {
-      memcpy(string_as_array(buffer), buffer_, size);
+      memcpy(mutable_string_data(buffer), buffer_, size);
       Advance(size);
     }
     return true;
diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc
index 2daab194..b39987ca 100644
--- a/src/google/protobuf/io/coded_stream_unittest.cc
+++ b/src/google/protobuf/io/coded_stream_unittest.cc
@@ -144,6 +144,10 @@ uint8 CodedStreamTest::buffer_[CodedStreamTest::kBufferSize];
 // checks.
 const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024};
 
+// In several ReadCord test functions, we either clear the Cord before ReadCord
+// calls or not.
+const bool kResetCords[] = {false, true};
+
 // -------------------------------------------------------------------
 // Varint tests.
 
@@ -682,6 +686,191 @@ TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnHeap) {
   EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
 }
 
+TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnTotalLimit, kBlockSizes) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.SetTotalBytesLimit(sizeof(kRawBytes), sizeof(kRawBytes));
+    EXPECT_EQ(sizeof(kRawBytes), coded_input.BytesUntilTotalBytesLimit());
+
+    string str;
+    EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    EXPECT_EQ(sizeof(kRawBytes) - strlen(kRawBytes),
+              coded_input.BytesUntilTotalBytesLimit());
+    EXPECT_EQ(kRawBytes, str);
+    // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
+    EXPECT_GE(str.capacity(), strlen(kRawBytes));
+  }
+
+  EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnPushedLimit, kBlockSizes) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.PushLimit(sizeof(buffer_));
+
+    string str;
+    EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    EXPECT_EQ(kRawBytes, str);
+    // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
+    EXPECT_GE(str.capacity(), strlen(kRawBytes));
+  }
+
+  EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationIfLimitsNotSet) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+
+    string str;
+    EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    EXPECT_EQ(kRawBytes, str);
+    // Note: this check depends on string class implementation. It
+    // expects that string will allocate more than strlen(kRawBytes)
+    // if the content of kRawBytes is appended to string in small
+    // chunks.
+    // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
+    EXPECT_GE(str.capacity(), strlen(kRawBytes));
+  }
+
+  EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsNegative) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.PushLimit(sizeof(buffer_));
+
+    string str;
+    EXPECT_FALSE(coded_input.ReadString(&str, -1));
+    // Note: this check depends on string class implementation. It
+    // expects that string will always allocate the same amount of
+    // memory for an empty string.
+    EXPECT_EQ(string().capacity(), str.capacity());
+  }
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsLarge) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.PushLimit(sizeof(buffer_));
+
+    string str;
+    EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
+    EXPECT_GT(1 << 30, str.capacity());
+  }
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheLimit) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.PushLimit(16);
+
+    string str;
+    EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    // Note: this check depends on string class implementation. It
+    // expects that string will allocate less than strlen(kRawBytes)
+    // for an empty string.
+    EXPECT_GT(strlen(kRawBytes), str.capacity());
+  }
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheTotalBytesLimit) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.SetTotalBytesLimit(16, 16);
+
+    string str;
+    EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    // Note: this check depends on string class implementation. It
+    // expects that string will allocate less than strlen(kRawBytes)
+    // for an empty string.
+    EXPECT_GT(strlen(kRawBytes), str.capacity());
+  }
+}
+
+TEST_F(CodedStreamTest,
+       ReadStringNoReservationSizeIsOverTheClosestLimit_GlobalLimitIsCloser) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.PushLimit(sizeof(buffer_));
+    coded_input.SetTotalBytesLimit(16, 16);
+
+    string str;
+    EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    // Note: this check depends on string class implementation. It
+    // expects that string will allocate less than strlen(kRawBytes)
+    // for an empty string.
+    EXPECT_GT(strlen(kRawBytes), str.capacity());
+  }
+}
+
+TEST_F(CodedStreamTest,
+       ReadStringNoReservationSizeIsOverTheClosestLimit_LocalLimitIsCloser) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.PushLimit(16);
+    coded_input.SetTotalBytesLimit(sizeof(buffer_), sizeof(buffer_));
+    EXPECT_EQ(sizeof(buffer_), coded_input.BytesUntilTotalBytesLimit());
+
+    string str;
+    EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    // Note: this check depends on string class implementation. It
+    // expects that string will allocate less than strlen(kRawBytes)
+    // for an empty string.
+    EXPECT_GT(strlen(kRawBytes), str.capacity());
+  }
+}
+
 
 // -------------------------------------------------------------------
 // Skip
@@ -980,9 +1169,11 @@ TEST_F(CodedStreamTest, TotalBytesLimit) {
   ArrayInputStream input(buffer_, sizeof(buffer_));
   CodedInputStream coded_input(&input);
   coded_input.SetTotalBytesLimit(16, -1);
+  EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit());
 
   string str;
   EXPECT_TRUE(coded_input.ReadString(&str, 16));
+  EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit());
 
   vector<string> errors;
 
@@ -997,7 +1188,9 @@ TEST_F(CodedStreamTest, TotalBytesLimit) {
     "A protocol message was rejected because it was too big", errors[0]);
 
   coded_input.SetTotalBytesLimit(32, -1);
+  EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit());
   EXPECT_TRUE(coded_input.ReadString(&str, 16));
+  EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit());
 }
 
 TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) {
diff --git a/src/google/protobuf/io/gzip_stream.h b/src/google/protobuf/io/gzip_stream.h
index 365e9ea5..7ee24bc3 100644
--- a/src/google/protobuf/io/gzip_stream.h
+++ b/src/google/protobuf/io/gzip_stream.h
@@ -118,7 +118,7 @@ class LIBPROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream {
     ZLIB = 2,
   };
 
-  struct LIBPROTOBUF_EXPORT Options {
+  struct Options {
     // Defaults to GZIP.
     Format format;
 
diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc
index c9b30359..76fb9442 100644
--- a/src/google/protobuf/io/printer_unittest.cc
+++ b/src/google/protobuf/io/printer_unittest.cc
@@ -231,7 +231,7 @@ TEST(Printer, Death) {
   EXPECT_DEBUG_DEATH(printer.Print("$unclosed"), "Unclosed variable name");
   EXPECT_DEBUG_DEATH(printer.Outdent(), "without matching Indent");
 }
-#endif  // PROTOBUF__HAS_DEATH_TEST
+#endif  // PROTOBUF_HAS_DEATH_TEST
 
 TEST(Printer, WriteFailurePartial) {
   char buffer[17];
diff --git a/src/google/protobuf/io/strtod.cc b/src/google/protobuf/io/strtod.cc
new file mode 100644
index 00000000..cf0c6e10
--- /dev/null
+++ b/src/google/protobuf/io/strtod.cc
@@ -0,0 +1,113 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <google/protobuf/io/strtod.h>
+
+#include <cstdio>
+#include <cstring>
+#include <string>
+
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+// ----------------------------------------------------------------------
+// NoLocaleStrtod()
+//   This code will make you cry.
+// ----------------------------------------------------------------------
+
+namespace {
+
+// Returns a string identical to *input except that the character pointed to
+// by radix_pos (which should be '.') is replaced with the locale-specific
+// radix character.
+string LocalizeRadix(const char* input, const char* radix_pos) {
+  // Determine the locale-specific radix character by calling sprintf() to
+  // print the number 1.5, then stripping off the digits.  As far as I can
+  // tell, this is the only portable, thread-safe way to get the C library
+  // to divuldge the locale's radix character.  No, localeconv() is NOT
+  // thread-safe.
+  char temp[16];
+  int size = sprintf(temp, "%.1f", 1.5);
+  GOOGLE_CHECK_EQ(temp[0], '1');
+  GOOGLE_CHECK_EQ(temp[size-1], '5');
+  GOOGLE_CHECK_LE(size, 6);
+
+  // Now replace the '.' in the input with it.
+  string result;
+  result.reserve(strlen(input) + size - 3);
+  result.append(input, radix_pos);
+  result.append(temp + 1, size - 2);
+  result.append(radix_pos + 1);
+  return result;
+}
+
+}  // namespace
+
+double NoLocaleStrtod(const char* text, char** original_endptr) {
+  // We cannot simply set the locale to "C" temporarily with setlocale()
+  // as this is not thread-safe.  Instead, we try to parse in the current
+  // locale first.  If parsing stops at a '.' character, then this is a
+  // pretty good hint that we're actually in some other locale in which
+  // '.' is not the radix character.
+
+  char* temp_endptr;
+  double result = strtod(text, &temp_endptr);
+  if (original_endptr != NULL) *original_endptr = temp_endptr;
+  if (*temp_endptr != '.') return result;
+
+  // Parsing halted on a '.'.  Perhaps we're in a different locale?  Let's
+  // try to replace the '.' with a locale-specific radix character and
+  // try again.
+  string localized = LocalizeRadix(text, temp_endptr);
+  const char* localized_cstr = localized.c_str();
+  char* localized_endptr;
+  result = strtod(localized_cstr, &localized_endptr);
+  if ((localized_endptr - localized_cstr) >
+      (temp_endptr - text)) {
+    // This attempt got further, so replacing the decimal must have helped.
+    // Update original_endptr to point at the right location.
+    if (original_endptr != NULL) {
+      // size_diff is non-zero if the localized radix has multiple bytes.
+      int size_diff = localized.size() - strlen(text);
+      // const_cast is necessary to match the strtod() interface.
+      *original_endptr = const_cast<char*>(
+        text + (localized_endptr - localized_cstr - size_diff));
+    }
+  }
+
+  return result;
+}
+
+}  // namespace io
+}  // namespace protobuf
+}  // namespace google
diff --git a/src/google/protobuf/io/strtod.h b/src/google/protobuf/io/strtod.h
new file mode 100644
index 00000000..2be3c43d
--- /dev/null
+++ b/src/google/protobuf/io/strtod.h
@@ -0,0 +1,50 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// A locale-independent version of strtod(), used to parse floating
+// point default values in .proto files, where the decimal separator
+// is always a dot.
+
+#ifndef GOOGLE_PROTOBUF_IO_STRTOD_H__
+#define GOOGLE_PROTOBUF_IO_STRTOD_H__
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+// A locale-independent version of the standard strtod(), which always
+// uses a dot as the decimal separator.
+double NoLocaleStrtod(const char* str, char** endptr);
+
+}  // namespace io
+}  // namespace protobuf
+
+}  // namespace google
+#endif  // GOOGLE_PROTOBUF_IO_STRTOD_H__
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
index a022b71d..d149305a 100644
--- a/src/google/protobuf/io/tokenizer.cc
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -91,6 +91,7 @@
 #include <google/protobuf/io/tokenizer.h>
 #include <google/protobuf/stubs/common.h>
 #include <google/protobuf/stubs/stringprintf.h>
+#include <google/protobuf/io/strtod.h>
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/stubs/strutil.h>
 #include <google/protobuf/stubs/stl_util.h>
@@ -195,7 +196,9 @@ Tokenizer::Tokenizer(ZeroCopyInputStream* input,
     record_target_(NULL),
     record_start_(-1),
     allow_f_after_float_(false),
-    comment_style_(CPP_COMMENT_STYLE) {
+    comment_style_(CPP_COMMENT_STYLE),
+    require_space_after_number_(true),
+    allow_multiline_strings_(false) {
 
   current_.line = 0;
   current_.column = 0;
@@ -350,9 +353,16 @@ void Tokenizer::ConsumeString(char delimiter) {
   while (true) {
     switch (current_char_) {
       case '\0':
-      case '\n': {
-        AddError("String literals cannot cross line boundaries.");
+        AddError("Unexpected end of string.");
         return;
+
+      case '\n': {
+        if (!allow_multiline_strings_) {
+          AddError("String literals cannot cross line boundaries.");
+          return;
+        }
+        NextChar();
+        break;
       }
 
       case '\\': {
@@ -449,7 +459,7 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
     }
   }
 
-  if (LookingAt<Letter>()) {
+  if (LookingAt<Letter>() && require_space_after_number_) {
     AddError("Need space between number and identifier.");
   } else if (current_char_ == '.') {
     if (is_float) {
@@ -618,6 +628,12 @@ bool Tokenizer::Next() {
         ConsumeString('\'');
         current_.type = TYPE_STRING;
       } else {
+        // Check if the high order bit is set.
+        if (current_char_ & 0x80) {
+          error_collector_->AddError(line_, column_,
+              StringPrintf("Interpreting non ascii codepoint %d.",
+                           static_cast<unsigned char>(current_char_)));
+        }
         NextChar();
         current_.type = TYPE_SYMBOL;
       }
@@ -1086,6 +1102,26 @@ void Tokenizer::ParseStringAppend(const string& text, string* output) {
   }
 }
 
+template<typename CharacterClass>
+static bool AllInClass(const string& s) {
+  for (int i = 0; i < s.size(); ++i) {
+    if (!CharacterClass::InClass(s[i]))
+      return false;
+  }
+  return true;
+}
+
+bool Tokenizer::IsIdentifier(const string& text) {
+  // Mirrors IDENTIFIER definition in Tokenizer::Next() above.
+  if (text.size() == 0)
+    return false;
+  if (!Letter::InClass(text.at(0)))
+    return false;
+  if (!AllInClass<Alphanumeric>(text.substr(1)))
+    return false;
+  return true;
+}
+
 }  // namespace io
 }  // namespace protobuf
 }  // namespace google
diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h
index 6b727d9f..2f07116e 100644
--- a/src/google/protobuf/io/tokenizer.h
+++ b/src/google/protobuf/io/tokenizer.h
@@ -229,6 +229,21 @@ class LIBPROTOBUF_EXPORT Tokenizer {
   // Sets the comment style.
   void set_comment_style(CommentStyle style) { comment_style_ = style; }
 
+  // Whether to require whitespace between a number and a field name.
+  // Default is true. Do not use this; for Google-internal cleanup only.
+  void set_require_space_after_number(bool require) {
+    require_space_after_number_ = require;
+  }
+
+  // Whether to allow string literals to span multiple lines. Default is false.
+  // Do not use this; for Google-internal cleanup only.
+  void set_allow_multiline_strings(bool allow) {
+    allow_multiline_strings_ = allow;
+  }
+
+  // External helper: validate an identifier.
+  static bool IsIdentifier(const string& text);
+
   // -----------------------------------------------------------------
  private:
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer);
@@ -259,6 +274,8 @@ class LIBPROTOBUF_EXPORT Tokenizer {
   // Options.
   bool allow_f_after_float_;
   CommentStyle comment_style_;
+  bool require_space_after_number_;
+  bool allow_multiline_strings_;
 
   // Since we count columns we need to interpret tabs somehow.  We'll take
   // the standard 8-character definition for lack of any way to do better.
diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc
index dbb5be4f..b39279b7 100644
--- a/src/google/protobuf/io/tokenizer_unittest.cc
+++ b/src/google/protobuf/io/tokenizer_unittest.cc
@@ -411,12 +411,6 @@ MultiTokenCase kMultiTokenCases[] = {
     { Tokenizer::TYPE_END       , ""   , 1, 3, 3 },
   }},
 
-  // Bytes with the high-order bit set should not be seen as control characters.
-  { "\300", {
-    { Tokenizer::TYPE_SYMBOL, "\300", 0, 0, 1 },
-    { Tokenizer::TYPE_END   , ""    , 0, 1, 1 },
-  }},
-
   // Test all whitespace chars
   { "foo\n\t\r\v\fbar", {
     { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0,  3 },
@@ -741,7 +735,7 @@ TEST_F(TokenizerTest, ParseInteger) {
   EXPECT_EQ(0, ParseInteger("0x"));
 
   uint64 i;
-#ifdef PROTOBUF_HASDEATH_TEST  // death tests do not work on Windows yet
+#ifdef PROTOBUF_HAS_DEATH_TEST  // death tests do not work on Windows yet
   // Test invalid integers that will never be tokenized as integers.
   EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("zxy", kuint64max, &i),
     "passed text that could not have been tokenized as an integer");
@@ -753,7 +747,7 @@ TEST_F(TokenizerTest, ParseInteger) {
     "passed text that could not have been tokenized as an integer");
   EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("-1", kuint64max, &i),
     "passed text that could not have been tokenized as an integer");
-#endif  // PROTOBUF_HASDEATH_TEST
+#endif  // PROTOBUF_HAS_DEATH_TEST
 
   // Test overflows.
   EXPECT_TRUE (Tokenizer::ParseInteger("0", 0, &i));
@@ -796,7 +790,7 @@ TEST_F(TokenizerTest, ParseFloat) {
   EXPECT_EQ(     0.0, Tokenizer::ParseFloat("1e-9999999999999999999999999999"));
   EXPECT_EQ(HUGE_VAL, Tokenizer::ParseFloat("1e+9999999999999999999999999999"));
 
-#ifdef PROTOBUF_HASDEATH_TEST  // death tests do not work on Windows yet
+#ifdef PROTOBUF_HAS_DEATH_TEST  // death tests do not work on Windows yet
   // Test invalid integers that will never be tokenized as integers.
   EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("zxy"),
     "passed text that could not have been tokenized as a float");
@@ -804,7 +798,7 @@ TEST_F(TokenizerTest, ParseFloat) {
     "passed text that could not have been tokenized as a float");
   EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("-1.0"),
     "passed text that could not have been tokenized as a float");
-#endif  // PROTOBUF_HASDEATH_TEST
+#endif  // PROTOBUF_HAS_DEATH_TEST
 }
 
 TEST_F(TokenizerTest, ParseString) {
@@ -843,10 +837,10 @@ TEST_F(TokenizerTest, ParseString) {
   EXPECT_EQ("u0", output);
 
   // Test invalid strings that will never be tokenized as strings.
-#ifdef PROTOBUF_HASDEATH_TEST  // death tests do not work on Windows yet
+#ifdef PROTOBUF_HAS_DEATH_TEST  // death tests do not work on Windows yet
   EXPECT_DEBUG_DEATH(Tokenizer::ParseString("", &output),
     "passed text that could not have been tokenized as a string");
-#endif  // PROTOBUF_HASDEATH_TEST
+#endif  // PROTOBUF_HAS_DEATH_TEST
 }
 
 TEST_F(TokenizerTest, ParseStringAppend) {
@@ -883,7 +877,7 @@ ErrorCase kErrorCases[] = {
   { "'\\x' foo", true,
     "0:3: Expected hex digits for escape sequence.\n" },
   { "'foo", false,
-    "0:4: String literals cannot cross line boundaries.\n" },
+    "0:4: Unexpected end of string.\n" },
   { "'bar\nfoo", true,
     "0:4: String literals cannot cross line boundaries.\n" },
   { "'\\u01' foo", true,
@@ -951,6 +945,10 @@ ErrorCase kErrorCases[] = {
     "0:0: Invalid control characters encountered in text.\n" },
   { string("\0\0foo", 5), true,
     "0:0: Invalid control characters encountered in text.\n" },
+
+  // Check error from high order bits set
+  { "\300foo", true,
+    "0:0: Interpreting non ascii codepoint 192.\n" },
 };
 
 TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) {
diff --git a/src/google/protobuf/io/zero_copy_stream.cc b/src/google/protobuf/io/zero_copy_stream.cc
index dad6ff14..4d53f29b 100644
--- a/src/google/protobuf/io/zero_copy_stream.cc
+++ b/src/google/protobuf/io/zero_copy_stream.cc
@@ -34,6 +34,7 @@
 
 #include <google/protobuf/io/zero_copy_stream.h>
 
+#include <google/protobuf/stubs/common.h>
 
 namespace google {
 namespace protobuf {
@@ -43,6 +44,14 @@ ZeroCopyInputStream::~ZeroCopyInputStream() {}
 ZeroCopyOutputStream::~ZeroCopyOutputStream() {}
 
 
+bool ZeroCopyOutputStream::WriteAliasedRaw(const void* /* data */,
+                                           int /* size */) {
+  GOOGLE_LOG(FATAL) << "This ZeroCopyOutputStream doesn't support aliasing. "
+                "Reaching here usually means a ZeroCopyOutputStream "
+                "implementation bug.";
+  return false;
+}
+
 }  // namespace io
 }  // namespace protobuf
 }  // namespace google
diff --git a/src/google/protobuf/io/zero_copy_stream.h b/src/google/protobuf/io/zero_copy_stream.h
index db5326f7..f8921228 100644
--- a/src/google/protobuf/io/zero_copy_stream.h
+++ b/src/google/protobuf/io/zero_copy_stream.h
@@ -226,6 +226,16 @@ class LIBPROTOBUF_EXPORT ZeroCopyOutputStream {
   // Returns the total number of bytes written since this object was created.
   virtual int64 ByteCount() const = 0;
 
+  // Write a given chunk of data to the output.  Some output streams may
+  // implement this in a way that avoids copying. Check AllowsAliasing() before
+  // calling WriteAliasedRaw(). It will GOOGLE_CHECK fail if WriteAliasedRaw() is
+  // called on a stream that does not allow aliasing.
+  //
+  // NOTE: It is caller's responsibility to ensure that the chunk of memory
+  // remains live until all of the data has been consumed from the stream.
+  virtual bool WriteAliasedRaw(const void* data, int size);
+  virtual bool AllowsAliasing() const { return false; }
+
 
  private:
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyOutputStream);
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.cc b/src/google/protobuf/io/zero_copy_stream_impl.cc
index 9fcbb622..7829a29f 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl.cc
+++ b/src/google/protobuf/io/zero_copy_stream_impl.cc
@@ -413,7 +413,9 @@ int64 ConcatenatingInputStream::ByteCount() const {
 
 LimitingInputStream::LimitingInputStream(ZeroCopyInputStream* input,
                                          int64 limit)
-  : input_(input), limit_(limit) {}
+  : input_(input), limit_(limit) {
+  prior_bytes_read_ = input_->ByteCount();
+}
 
 LimitingInputStream::~LimitingInputStream() {
   // If we overshot the limit, back up.
@@ -457,9 +459,9 @@ bool LimitingInputStream::Skip(int count) {
 
 int64 LimitingInputStream::ByteCount() const {
   if (limit_ < 0) {
-    return input_->ByteCount() + limit_;
+    return input_->ByteCount() + limit_ - prior_bytes_read_;
   } else {
-    return input_->ByteCount();
+    return input_->ByteCount() - prior_bytes_read_;
   }
 }
 
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h
index 9fedb005..83827097 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl.h
+++ b/src/google/protobuf/io/zero_copy_stream_impl.h
@@ -344,6 +344,7 @@ class LIBPROTOBUF_EXPORT LimitingInputStream : public ZeroCopyInputStream {
  private:
   ZeroCopyInputStream* input_;
   int64 limit_;  // Decreases as we go, becomes negative if we overshoot.
+  int64 prior_bytes_read_;  // Bytes read on underlying stream at construction
 
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LimitingInputStream);
 };
diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
index b3a71ce3..d186a98b 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
+++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
@@ -33,10 +33,12 @@
 //  Sanjay Ghemawat, Jeff Dean, and others.
 
 #include <google/protobuf/io/zero_copy_stream_impl_lite.h>
-#include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/stl_util.h>
 
 #include <algorithm>
+#include <limits>
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stl_util.h>
 
 namespace google {
 namespace protobuf {
@@ -161,15 +163,23 @@ bool StringOutputStream::Next(void** data, int* size) {
     // without a memory allocation this way.
     STLStringResizeUninitialized(target_, target_->capacity());
   } else {
-    // Size has reached capacity, so double the size.  Also make sure
-    // that the new size is at least kMinimumSize.
+    // Size has reached capacity, try to double the size.
+    if (old_size > std::numeric_limits<int>::max() / 2) {
+      // Can not double the size otherwise it is going to cause integer
+      // overflow in the expression below: old_size * 2 ";
+      GOOGLE_LOG(ERROR) << "Cannot allocate buffer larger than kint32max for "
+                 << "StringOutputStream.";
+      return false;
+    }
+    // Double the size, also make sure that the new size is at least
+    // kMinimumSize.
     STLStringResizeUninitialized(
       target_,
       max(old_size * 2,
           kMinimumSize + 0));  // "+ 0" works around GCC4 weirdness.
   }
 
-  *data = string_as_array(target_) + old_size;
+  *data = mutable_string_data(target_) + old_size;
   *size = target_->size() - old_size;
   return true;
 }
diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.h b/src/google/protobuf/io/zero_copy_stream_impl_lite.h
index 153f543e..b980143e 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl_lite.h
+++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.h
@@ -48,6 +48,7 @@
 #include <iosfwd>
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stl_util.h>
 
 
 namespace google {
@@ -333,6 +334,19 @@ class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStrea
 
 // ===================================================================
 
+// Return a pointer to mutable characters underlying the given string.  The
+// return value is valid until the next time the string is resized.  We
+// trust the caller to treat the return value as an array of length s->size().
+inline char* mutable_string_data(string* s) {
+#ifdef LANG_CXX11
+  // This should be simpler & faster than string_as_array() because the latter
+  // is guaranteed to return NULL when *s is empty, so it has to check for that.
+  return &(*s)[0];
+#else
+  return string_as_array(s);
+#endif
+}
+
 }  // namespace io
 }  // namespace protobuf
 
diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc
index 6f155df7..75eb2a43 100644
--- a/src/google/protobuf/io/zero_copy_stream_unittest.cc
+++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc
@@ -560,9 +560,10 @@ TEST_F(IoTest, CompressionOptions) {
   // Some ad-hoc testing of compression options.
 
   string golden;
-  File::ReadFileToStringOrDie(
-    TestSourceDir() + "/google/protobuf/testdata/golden_message",
-    &golden);
+  GOOGLE_CHECK_OK(File::GetContents(
+      TestSourceDir() +
+          "/google/protobuf/testdata/golden_message",
+      &golden, true));
 
   GzipOutputStream::Options options;
   string gzip_compressed = Compress(golden, options);
@@ -923,6 +924,26 @@ TEST_F(IoTest, LimitingInputStream) {
   ReadStuff(&input);
 }
 
+// Checks that ByteCount works correctly for LimitingInputStreams where the
+// underlying stream has already been read.
+TEST_F(IoTest, LimitingInputStreamByteCount) {
+  const int kHalfBufferSize = 128;
+  const int kBufferSize = kHalfBufferSize * 2;
+  uint8 buffer[kBufferSize];
+
+  // Set up input. Only allow half to be read at once.
+  ArrayInputStream array_input(buffer, kBufferSize, kHalfBufferSize);
+  const void* data;
+  int size;
+  EXPECT_TRUE(array_input.Next(&data, &size));
+  EXPECT_EQ(kHalfBufferSize, array_input.ByteCount());
+  // kHalfBufferSize - 1 to test limiting logic as well.
+  LimitingInputStream input(&array_input, kHalfBufferSize - 1);
+  EXPECT_EQ(0, input.ByteCount());
+  EXPECT_TRUE(input.Next(&data, &size));
+  EXPECT_EQ(kHalfBufferSize - 1 , input.ByteCount());
+}
+
 // Check that a zero-size array doesn't confuse the code.
 TEST(ZeroSizeArray, Input) {
   ArrayInputStream input(NULL, 0);
author	jieluo@google.com <jieluo@google.com@630680e5-0e50-0410-840e-4b1c322b438d>	2014-07-18 00:47:59 +0000
committer	jieluo@google.com <jieluo@google.com@630680e5-0e50-0410-840e-4b1c322b438d>	2014-07-18 00:47:59 +0000
commit	4de8f55113007fdc8e34107950e605fc0209d465 (patch)
tree	92b7da8757a7740d9e1f2d3ead233542947d8c8c /src/google/protobuf/io
parent	c5553a3d18f80132b9079c5504bc0aa1f7f950a0 (diff)