18 files changed, 397 insertions, 96 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc
index 93e1a22e..e3a34d0a 100644
--- a/src/google/protobuf/io/coded_stream.cc
+++ b/src/google/protobuf/io/coded_stream.cc
@@ -44,6 +44,7 @@
 #include <limits.h>
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/arena.h>
+#include <google/protobuf/stubs/logging.h>
 #include <google/protobuf/stubs/common.h>
 #include <google/protobuf/stubs/stl_util.h>
 
@@ -156,6 +157,11 @@ CodedInputStream::IncrementRecursionDepthAndPushLimit(int byte_limit) {
   return std::make_pair(PushLimit(byte_limit), --recursion_budget_);
 }
 
+CodedInputStream::Limit CodedInputStream::ReadLengthAndPushLimit() {
+  uint32 length;
+  return PushLimit(ReadVarint32(&length) ? length : 0);
+}
+
 bool CodedInputStream::DecrementRecursionDepthAndPopLimit(Limit limit) {
   bool result = ConsumedEntireMessage();
   PopLimit(limit);
@@ -164,6 +170,12 @@ bool CodedInputStream::DecrementRecursionDepthAndPopLimit(Limit limit) {
   return result;
 }
 
+bool CodedInputStream::CheckEntireMessageConsumedAndPopLimit(Limit limit) {
+  bool result = ConsumedEntireMessage();
+  PopLimit(limit);
+  return result;
+}
+
 int CodedInputStream::BytesUntilLimit() const {
   if (current_limit_ == INT_MAX) return -1;
   int current_position = CurrentPosition();
@@ -245,20 +257,7 @@ bool CodedInputStream::GetDirectBufferPointer(const void** data, int* size) {
 }
 
 bool CodedInputStream::ReadRaw(void* buffer, int size) {
-  int current_buffer_size;
-  while ((current_buffer_size = BufferSize()) < size) {
-    // Reading past end of buffer.  Copy what we have, then refresh.
-    memcpy(buffer, buffer_, current_buffer_size);
-    buffer = reinterpret_cast<uint8*>(buffer) + current_buffer_size;
-    size -= current_buffer_size;
-    Advance(current_buffer_size);
-    if (!Refresh()) return false;
-  }
-
-  memcpy(buffer, buffer_, size);
-  Advance(size);
-
-  return true;
+  return InternalReadRawInline(buffer, size);
 }
 
 bool CodedInputStream::ReadString(string* buffer, int size) {
@@ -336,17 +335,23 @@ bool CodedInputStream::ReadLittleEndian64Fallback(uint64* value) {
 
 namespace {
 
-inline const uint8* ReadVarint32FromArray(
-    const uint8* buffer, uint32* value) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
-inline const uint8* ReadVarint32FromArray(const uint8* buffer, uint32* value) {
+// Read a varint from the given buffer, write it to *value, and return a pair.
+// The first part of the pair is true iff the read was successful.  The second
+// part is buffer + (number of bytes read).  This function is always inlined,
+// so returning a pair is costless.
+GOOGLE_ATTRIBUTE_ALWAYS_INLINE ::std::pair<bool, const uint8*> ReadVarint32FromArray(
+    uint32 first_byte, const uint8* buffer,
+    uint32* value);
+inline ::std::pair<bool, const uint8*> ReadVarint32FromArray(
+    uint32 first_byte, const uint8* buffer, uint32* value) {
   // Fast path:  We have enough bytes left in the buffer to guarantee that
   // this read won't cross the end, so we can skip the checks.
+  GOOGLE_DCHECK_EQ(*buffer, first_byte);
+  GOOGLE_DCHECK_EQ(first_byte & 0x80, 0x80) << first_byte;
   const uint8* ptr = buffer;
   uint32 b;
-  uint32 result;
-
-  b = *(ptr++); result  = b      ; if (!(b & 0x80)) goto done;
-  result -= 0x80;
+  uint32 result = first_byte - 0x80;
+  ++ptr;  // We just processed the first byte.  Move on to the second.
   b = *(ptr++); result += b <<  7; if (!(b & 0x80)) goto done;
   result -= 0x80 << 7;
   b = *(ptr++); result += b << 14; if (!(b & 0x80)) goto done;
@@ -364,38 +369,42 @@ inline const uint8* ReadVarint32FromArray(const uint8* buffer, uint32* value) {
 
   // We have overrun the maximum size of a varint (10 bytes).  Assume
   // the data is corrupt.
-  return NULL;
+  return std::make_pair(false, ptr);
 
  done:
   *value = result;
-  return ptr;
+  return std::make_pair(true, ptr);
 }
 
 }  // namespace
 
 bool CodedInputStream::ReadVarint32Slow(uint32* value) {
-  uint64 result;
   // Directly invoke ReadVarint64Fallback, since we already tried to optimize
   // for one-byte varints.
-  if (!ReadVarint64Fallback(&result)) return false;
-  *value = (uint32)result;
-  return true;
+  std::pair<uint64, bool> p = ReadVarint64Fallback();
+  *value = static_cast<uint32>(p.first);
+  return p.second;
 }
 
-bool CodedInputStream::ReadVarint32Fallback(uint32* value) {
+int64 CodedInputStream::ReadVarint32Fallback(uint32 first_byte_or_zero) {
   if (BufferSize() >= kMaxVarintBytes ||
       // Optimization:  We're also safe if the buffer is non-empty and it ends
       // with a byte that would terminate a varint.
       (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
-    const uint8* end = ReadVarint32FromArray(buffer_, value);
-    if (end == NULL) return false;
-    buffer_ = end;
-    return true;
+    GOOGLE_DCHECK_NE(first_byte_or_zero, 0)
+        << "Caller should provide us with *buffer_ when buffer is non-empty";
+    uint32 temp;
+    ::std::pair<bool, const uint8*> p =
+          ReadVarint32FromArray(first_byte_or_zero, buffer_, &temp);
+    if (!p.first) return -1;
+    buffer_ = p.second;
+    return temp;
   } else {
     // Really slow case: we will incur the cost of an extra function call here,
     // but moving this out of line reduces the size of this function, which
     // improves the common case. In micro benchmarks, this is worth about 10-15%
-    return ReadVarint32Slow(value);
+    uint32 temp;
+    return ReadVarint32Slow(&temp) ? static_cast<int64>(temp) : -1;
   }
 }
 
@@ -425,18 +434,24 @@ uint32 CodedInputStream::ReadTagSlow() {
   return static_cast<uint32>(result);
 }
 
-uint32 CodedInputStream::ReadTagFallback() {
+uint32 CodedInputStream::ReadTagFallback(uint32 first_byte_or_zero) {
   const int buf_size = BufferSize();
   if (buf_size >= kMaxVarintBytes ||
       // Optimization:  We're also safe if the buffer is non-empty and it ends
       // with a byte that would terminate a varint.
       (buf_size > 0 && !(buffer_end_[-1] & 0x80))) {
+    GOOGLE_DCHECK_EQ(first_byte_or_zero, buffer_[0]);
+    if (first_byte_or_zero == 0) {
+      ++buffer_;
+      return 0;
+    }
     uint32 tag;
-    const uint8* end = ReadVarint32FromArray(buffer_, &tag);
-    if (end == NULL) {
+    ::std::pair<bool, const uint8*> p =
+        ReadVarint32FromArray(first_byte_or_zero, buffer_, &tag);
+    if (!p.first) {
       return 0;
     }
-    buffer_ = end;
+    buffer_ = p.second;
     return tag;
   } else {
     // We are commonly at a limit when attempting to read tags. Try to quickly
@@ -479,7 +494,7 @@ bool CodedInputStream::ReadVarint64Slow(uint64* value) {
   return true;
 }
 
-bool CodedInputStream::ReadVarint64Fallback(uint64* value) {
+std::pair<uint64, bool> CodedInputStream::ReadVarint64Fallback() {
   if (BufferSize() >= kMaxVarintBytes ||
       // Optimization:  We're also safe if the buffer is non-empty and it ends
       // with a byte that would terminate a varint.
@@ -517,16 +532,18 @@ bool CodedInputStream::ReadVarint64Fallback(uint64* value) {
 
     // We have overrun the maximum size of a varint (10 bytes).  The data
     // must be corrupt.
-    return false;
+    return std::make_pair(0, false);
 
    done:
     Advance(ptr - buffer_);
-    *value = (static_cast<uint64>(part0)      ) |
-             (static_cast<uint64>(part1) << 28) |
-             (static_cast<uint64>(part2) << 56);
-    return true;
+    return std::make_pair((static_cast<uint64>(part0)) |
+                              (static_cast<uint64>(part1) << 28) |
+                              (static_cast<uint64>(part2) << 56),
+                          true);
   } else {
-    return ReadVarint64Slow(value);
+    uint64 temp;
+    bool success = ReadVarint64Slow(&temp);
+    return std::make_pair(temp, success);
   }
 }
 
@@ -612,6 +629,24 @@ CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output)
   had_error_ = false;
 }
 
+CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output,
+                                     bool do_eager_refresh)
+  : output_(output),
+    buffer_(NULL),
+    buffer_size_(0),
+    total_bytes_(0),
+    had_error_(false),
+    aliasing_enabled_(false) {
+  if (do_eager_refresh) {
+    // Eagerly Refresh() so buffer space is immediately available.
+    Refresh();
+    // The Refresh() may have failed. If the client doesn't write any data,
+    // though, don't consider this an error. If the client does write data, then
+    // another Refresh() will be attempted and it will set the error once again.
+    had_error_ = false;
+  }
+}
+
 CodedOutputStream::~CodedOutputStream() {
   Trim();
 }
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h
index dea4b650..e3771003 100644
--- a/src/google/protobuf/io/coded_stream.h
+++ b/src/google/protobuf/io/coded_stream.h
@@ -109,14 +109,15 @@
 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
 
+#include <assert.h>
 #include <string>
 #include <utility>
 #ifdef _MSC_VER
-  #if defined(_M_IX86) && \
-      !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
+  // Assuming windows is always little-endian.
+  #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
     #define PROTOBUF_LITTLE_ENDIAN 1
   #endif
-  #if _MSC_VER >= 1300
+  #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
     // If MSVC has "/RTCc" set, it will complain about truncating casts at
     // runtime.  This file contains some intentional truncating casts.
     #pragma runtime_checks("c", off)
@@ -191,12 +192,16 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
 
   // Like GetDirectBufferPointer, but this method is inlined, and does not
   // attempt to Refresh() if the buffer is currently empty.
-  inline void GetDirectBufferPointerInline(const void** data,
-                                           int* size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+  GOOGLE_ATTRIBUTE_ALWAYS_INLINE void GetDirectBufferPointerInline(const void** data,
+                                                            int* size);
 
   // Read raw bytes, copying them into the given buffer.
   bool ReadRaw(void* buffer, int size);
 
+  // Like the above, with inlined optimizations. This should only be used
+  // by the protobuf implementation.
+  GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool InternalReadRawInline(void* buffer, int size);
+
   // Like ReadRaw, but reads into a string.
   //
   // Implementation Note:  ReadString() grows the string gradually as it
@@ -207,8 +212,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   bool ReadString(string* buffer, int size);
   // Like the above, with inlined optimizations. This should only be used
   // by the protobuf implementation.
-  inline bool InternalReadStringInline(string* buffer,
-                                       int size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+  GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool InternalReadStringInline(string* buffer,
+                                                        int size);
 
 
   // Read a 32-bit little-endian integer.
@@ -238,7 +243,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // Always inline because this is only called in one place per parse loop
   // but it is called for every iteration of said loop, so it should be fast.
   // GCC doesn't want to inline this by default.
-  uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+  GOOGLE_ATTRIBUTE_ALWAYS_INLINE uint32 ReadTag();
 
   // This usually a faster alternative to ReadTag() when cutoff is a manifest
   // constant.  It does particularly well for cutoff >= 127.  The first part
@@ -248,8 +253,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // above cutoff or is 0.  (There's intentional wiggle room when tag is 0,
   // because that can arise in several ways, and for best performance we want
   // to avoid an extra "is tag == 0?" check here.)
-  inline std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff)
-      GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+  GOOGLE_ATTRIBUTE_ALWAYS_INLINE std::pair<uint32, bool> ReadTagWithCutoff(
+      uint32 cutoff);
 
   // Usually returns true if calling ReadVarint32() now would produce the given
   // value.  Will always return false if ReadVarint32() would not return the
@@ -258,7 +263,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // parameter.
   // Always inline because this collapses to a small number of instructions
   // when given a constant parameter, but GCC doesn't want to inline by default.
-  bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+  GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool ExpectTag(uint32 expected);
 
   // Like above, except this reads from the specified buffer. The caller is
   // responsible for ensuring that the buffer is large enough to read a varint
@@ -267,9 +272,9 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   //
   // Returns a pointer beyond the expected tag if it was found, or NULL if it
   // was not.
-  static const uint8* ExpectTagFromArray(
+  GOOGLE_ATTRIBUTE_ALWAYS_INLINE static const uint8* ExpectTagFromArray(
       const uint8* buffer,
-      uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+      uint32 expected);
 
   // Usually returns true if no more bytes can be read.  Always returns false
   // if more bytes can be read.  If ExpectAtEnd() returns true, a subsequent
@@ -387,9 +392,14 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // under the limit, false if it has gone over.
   bool IncrementRecursionDepth();
 
-  // Decrements the recursion depth.
+  // Decrements the recursion depth if possible.
   void DecrementRecursionDepth();
 
+  // Decrements the recursion depth blindly.  This is faster than
+  // DecrementRecursionDepth().  It should be used only if all previous
+  // increments to recursion depth were successful.
+  void UnsafeDecrementRecursionDepth();
+
   // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_).
   // Using this can reduce code size and complexity in some cases.  The caller
   // is expected to check that the second part of the result is non-negative (to
@@ -398,15 +408,25 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit(
       int byte_limit);
 
+  // Shorthand for PushLimit(ReadVarint32(&length) ? length : 0).
+  Limit ReadLengthAndPushLimit();
+
   // Helper that is equivalent to: {
   //  bool result = ConsumedEntireMessage();
   //  PopLimit(limit);
-  //  DecrementRecursionDepth();
+  //  UnsafeDecrementRecursionDepth();
   //  return result; }
   // Using this can reduce code size and complexity in some cases.
   // Do not use unless the current recursion depth is greater than zero.
   bool DecrementRecursionDepthAndPopLimit(Limit limit);
 
+  // Helper that is equivalent to: {
+  //  bool result = ConsumedEntireMessage();
+  //  PopLimit(limit);
+  //  return result; }
+  // Using this can reduce code size and complexity in some cases.
+  bool CheckEntireMessageConsumedAndPopLimit(Limit limit);
+
   // Extension Registry ----------------------------------------------
   // ADVANCED USAGE:  99.9% of people can ignore this section.
   //
@@ -568,9 +588,13 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // optimization. The Slow method is yet another fallback when the buffer is
   // not large enough. Making the slow path out-of-line speeds up the common
   // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
-  // message crosses multiple buffers.
-  bool ReadVarint32Fallback(uint32* value);
-  bool ReadVarint64Fallback(uint64* value);
+  // message crosses multiple buffers.  Note: ReadVarint32Fallback() and
+  // ReadVarint64Fallback() are called frequently and generally not inlined, so
+  // they have been optimized to avoid "out" parameters.  The former returns -1
+  // if it fails and the uint32 it read otherwise.  The latter has a bool
+  // indicating success or failure as part of its return type.
+  int64 ReadVarint32Fallback(uint32 first_byte_or_zero);
+  std::pair<uint64, bool> ReadVarint64Fallback();
   bool ReadVarint32Slow(uint32* value);
   bool ReadVarint64Slow(uint64* value);
   bool ReadLittleEndian32Fallback(uint32* value);
@@ -578,7 +602,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // Fallback/slow methods for reading tags. These do not update last_tag_,
   // but will set legitimate_message_end_ if we are at the end of the input
   // stream.
-  uint32 ReadTagFallback();
+  uint32 ReadTagFallback(uint32 first_byte_or_zero);
   uint32 ReadTagSlow();
   bool ReadStringFallback(string* buffer, int size);
 
@@ -642,6 +666,7 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
  public:
   // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
   explicit CodedOutputStream(ZeroCopyOutputStream* output);
+  CodedOutputStream(ZeroCopyOutputStream* output, bool do_eager_refresh);
 
   // Destroy the CodedOutputStream and position the underlying
   // ZeroCopyOutputStream immediately after the last byte written.
@@ -742,8 +767,8 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   // but GCC by default doesn't want to inline this.
   void WriteTag(uint32 value);
   // Like WriteTag()  but writing directly to the target array.
-  static uint8* WriteTagToArray(
-      uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+  GOOGLE_ATTRIBUTE_ALWAYS_INLINE static uint8* WriteTagToArray(uint32 value,
+                                                        uint8* target);
 
   // Returns the number of bytes needed to encode the given value as a varint.
   static int VarintSize32(uint32 value);
@@ -807,8 +832,8 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   // WriteVarint32FallbackToArray.  Meanwhile, WriteVarint32() is already
   // out-of-line, so it should just invoke this directly to avoid any extra
   // function call overhead.
-  static uint8* WriteVarint64ToArrayInline(
-      uint64 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+  GOOGLE_ATTRIBUTE_ALWAYS_INLINE static uint8* WriteVarint64ToArrayInline(
+      uint64 value, uint8* target);
 
   static int VarintSize32Fallback(uint32 value);
 };
@@ -818,13 +843,18 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
 // methods optimize for that case.
 
 inline bool CodedInputStream::ReadVarint32(uint32* value) {
-  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
-    *value = *buffer_;
-    Advance(1);
-    return true;
-  } else {
-    return ReadVarint32Fallback(value);
+  uint32 v = 0;
+  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
+    v = *buffer_;
+    if (v < 0x80) {
+      *value = v;
+      Advance(1);
+      return true;
+    }
   }
+  int64 result = ReadVarint32Fallback(v);
+  *value = static_cast<uint32>(result);
+  return result >= 0;
 }
 
 inline bool CodedInputStream::ReadVarint64(uint64* value) {
@@ -832,9 +862,10 @@ inline bool CodedInputStream::ReadVarint64(uint64* value) {
     *value = *buffer_;
     Advance(1);
     return true;
-  } else {
-    return ReadVarint64Fallback(value);
   }
+  std::pair<uint64, bool> p = ReadVarint64Fallback();
+  *value = p.first;
+  return p.second;
 }
 
 // static
@@ -903,14 +934,17 @@ inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
 }
 
 inline uint32 CodedInputStream::ReadTag() {
-  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] < 0x80) {
-    last_tag_ = buffer_[0];
-    Advance(1);
-    return last_tag_;
-  } else {
-    last_tag_ = ReadTagFallback();
-    return last_tag_;
+  uint32 v = 0;
+  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
+    v = *buffer_;
+    if (v < 0x80) {
+      last_tag_ = v;
+      Advance(1);
+      return v;
+    }
   }
+  last_tag_ = ReadTagFallback(v);
+  return last_tag_;
 }
 
 inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
@@ -918,10 +952,12 @@ inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
   // In performance-sensitive code we can expect cutoff to be a compile-time
   // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
   // compile time.
+  uint32 first_byte_or_zero = 0;
   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
     // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
     // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
     // is large enough then is it better to check for the two-byte case first?
+    first_byte_or_zero = buffer_[0];
     if (static_cast<int8>(buffer_[0]) > 0) {
       const uint32 kMax1ByteVarint = 0x7f;
       uint32 tag = last_tag_ = buffer_[0];
@@ -948,7 +984,7 @@ inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
     }
   }
   // Slow path
-  last_tag_ = ReadTagFallback();
+  last_tag_ = ReadTagFallback(first_byte_or_zero);
   return std::make_pair(last_tag_, static_cast<uint32>(last_tag_ - 1) < cutoff);
 }
 
@@ -1001,7 +1037,7 @@ inline const uint8* CodedInputStream::ExpectTagFromArray(
 inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
                                                            int* size) {
   *data = buffer_;
-  *size = buffer_end_ - buffer_;
+  *size = static_cast<int>(buffer_end_ - buffer_);
 }
 
 inline bool CodedInputStream::ExpectAtEnd() {
@@ -1177,6 +1213,11 @@ inline void CodedInputStream::DecrementRecursionDepth() {
   if (recursion_budget_ < recursion_limit_) ++recursion_budget_;
 }
 
+inline void CodedInputStream::UnsafeDecrementRecursionDepth() {
+  assert(recursion_budget_ < recursion_limit_);
+  ++recursion_budget_;
+}
+
 inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
                                                    MessageFactory* factory) {
   extension_pool_ = pool;
@@ -1192,7 +1233,7 @@ inline MessageFactory* CodedInputStream::GetExtensionFactory() {
 }
 
 inline int CodedInputStream::BufferSize() const {
-  return buffer_end_ - buffer_;
+  return static_cast<int>(buffer_end_ - buffer_);
 }
 
 inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
@@ -1245,9 +1286,9 @@ inline bool CodedInputStream::IsFlat() const {
 }  // namespace protobuf
 
 
-#if defined(_MSC_VER) && _MSC_VER >= 1300
+#if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
   #pragma runtime_checks("c", restore)
-#endif  // _MSC_VER
+#endif  // _MSC_VER && !defined(__INTEL_COMPILER)
 
 }  // namespace google
 #endif  // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
diff --git a/src/google/protobuf/io/coded_stream_inl.h b/src/google/protobuf/io/coded_stream_inl.h
index cd8d1746..d95b06e0 100644
--- a/src/google/protobuf/io/coded_stream_inl.h
+++ b/src/google/protobuf/io/coded_stream_inl.h
@@ -36,6 +36,7 @@
 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__
 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__
 
+#include <google/protobuf/stubs/logging.h>
 #include <google/protobuf/stubs/common.h>
 #include <google/protobuf/io/coded_stream.h>
 #include <google/protobuf/io/zero_copy_stream_impl_lite.h>
@@ -66,6 +67,23 @@ inline bool CodedInputStream::InternalReadStringInline(string* buffer,
   return ReadStringFallback(buffer, size);
 }
 
+inline bool CodedInputStream::InternalReadRawInline(void* buffer, int size) {
+  int current_buffer_size;
+  while ((current_buffer_size = BufferSize()) < size) {
+    // Reading past end of buffer.  Copy what we have, then refresh.
+    memcpy(buffer, buffer_, current_buffer_size);
+    buffer = reinterpret_cast<uint8*>(buffer) + current_buffer_size;
+    size -= current_buffer_size;
+    Advance(current_buffer_size);
+    if (!Refresh()) return false;
+  }
+
+  memcpy(buffer, buffer_, size);
+  Advance(size);
+
+  return true;
+}
+
 }  // namespace io
 }  // namespace protobuf
 }  // namespace google
diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc
index 02d5bad3..d1782e39 100644
--- a/src/google/protobuf/io/coded_stream_unittest.cc
+++ b/src/google/protobuf/io/coded_stream_unittest.cc
@@ -34,6 +34,10 @@
 //
 // This file contains tests and benchmarks.
 
+#include <memory>
+#ifndef _SHARED_PTR_H
+#include <google/protobuf/stubs/shared_ptr.h>
+#endif
 #include <vector>
 
 #include <google/protobuf/io/coded_stream.h>
@@ -41,6 +45,8 @@
 #include <limits.h>
 
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/scoped_ptr.h>
 #include <google/protobuf/testing/googletest.h>
 #include <gtest/gtest.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
@@ -677,7 +683,7 @@ TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnStack) {
 }
 
 TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnHeap) {
-  scoped_array<uint8> buffer(new uint8[8]);
+  google::protobuf::scoped_array<uint8> buffer(new uint8[8]);
   CodedInputStream coded_input(buffer.get(), 8);
   string str;
   EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
diff --git a/src/google/protobuf/io/gzip_stream.cc b/src/google/protobuf/io/gzip_stream.cc
index 77d485cf..9c621b6a 100644
--- a/src/google/protobuf/io/gzip_stream.cc
+++ b/src/google/protobuf/io/gzip_stream.cc
@@ -33,12 +33,12 @@
 // This file contains the implementation of classes GzipInputStream and
 // GzipOutputStream.
 
-#include "config.h"
 
 #if HAVE_ZLIB
 #include <google/protobuf/io/gzip_stream.h>
 
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
 
 namespace google {
 namespace protobuf {
diff --git a/src/google/protobuf/io/printer.cc b/src/google/protobuf/io/printer.cc
index e621ba1d..7d886506 100644
--- a/src/google/protobuf/io/printer.cc
+++ b/src/google/protobuf/io/printer.cc
@@ -34,6 +34,7 @@
 
 #include <google/protobuf/io/printer.h>
 #include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/stubs/logging.h>
 #include <google/protobuf/stubs/common.h>
 
 namespace google {
@@ -155,6 +156,78 @@ void Printer::Print(const char* text,
   Print(vars, text);
 }
 
+void Printer::Print(const char* text,
+                    const char* variable1, const string& value1,
+                    const char* variable2, const string& value2,
+                    const char* variable3, const string& value3,
+                    const char* variable4, const string& value4,
+                    const char* variable5, const string& value5) {
+  map<string, string> vars;
+  vars[variable1] = value1;
+  vars[variable2] = value2;
+  vars[variable3] = value3;
+  vars[variable4] = value4;
+  vars[variable5] = value5;
+  Print(vars, text);
+}
+
+void Printer::Print(const char* text,
+                    const char* variable1, const string& value1,
+                    const char* variable2, const string& value2,
+                    const char* variable3, const string& value3,
+                    const char* variable4, const string& value4,
+                    const char* variable5, const string& value5,
+                    const char* variable6, const string& value6) {
+  map<string, string> vars;
+  vars[variable1] = value1;
+  vars[variable2] = value2;
+  vars[variable3] = value3;
+  vars[variable4] = value4;
+  vars[variable5] = value5;
+  vars[variable6] = value6;
+  Print(vars, text);
+}
+
+void Printer::Print(const char* text,
+                    const char* variable1, const string& value1,
+                    const char* variable2, const string& value2,
+                    const char* variable3, const string& value3,
+                    const char* variable4, const string& value4,
+                    const char* variable5, const string& value5,
+                    const char* variable6, const string& value6,
+                    const char* variable7, const string& value7) {
+  map<string, string> vars;
+  vars[variable1] = value1;
+  vars[variable2] = value2;
+  vars[variable3] = value3;
+  vars[variable4] = value4;
+  vars[variable5] = value5;
+  vars[variable6] = value6;
+  vars[variable7] = value7;
+  Print(vars, text);
+}
+
+void Printer::Print(const char* text,
+                    const char* variable1, const string& value1,
+                    const char* variable2, const string& value2,
+                    const char* variable3, const string& value3,
+                    const char* variable4, const string& value4,
+                    const char* variable5, const string& value5,
+                    const char* variable6, const string& value6,
+                    const char* variable7, const string& value7,
+                    const char* variable8, const string& value8) {
+  map<string, string> vars;
+  vars[variable1] = value1;
+  vars[variable2] = value2;
+  vars[variable3] = value3;
+  vars[variable4] = value4;
+  vars[variable5] = value5;
+  vars[variable6] = value6;
+  vars[variable7] = value7;
+  vars[variable8] = value8;
+  Print(vars, text);
+}
+
 void Printer::Indent() {
   indent_ += "  ";
 }
diff --git a/src/google/protobuf/io/printer.h b/src/google/protobuf/io/printer.h
index 92ce3409..f1490bbe 100644
--- a/src/google/protobuf/io/printer.h
+++ b/src/google/protobuf/io/printer.h
@@ -91,8 +91,36 @@ class LIBPROTOBUF_EXPORT Printer {
                                const char* variable2, const string& value2,
                                const char* variable3, const string& value3,
                                const char* variable4, const string& value4);
-  // TODO(kenton):  Overloaded versions with more variables?  Three seems
-  //   to be enough.
+  // Like the first Print(), except the substitutions are given as parameters.
+  void Print(const char* text, const char* variable1, const string& value1,
+                               const char* variable2, const string& value2,
+                               const char* variable3, const string& value3,
+                               const char* variable4, const string& value4,
+                               const char* variable5, const string& value5);
+  // Like the first Print(), except the substitutions are given as parameters.
+  void Print(const char* text, const char* variable1, const string& value1,
+                               const char* variable2, const string& value2,
+                               const char* variable3, const string& value3,
+                               const char* variable4, const string& value4,
+                               const char* variable5, const string& value5,
+                               const char* variable6, const string& value6);
+  // Like the first Print(), except the substitutions are given as parameters.
+  void Print(const char* text, const char* variable1, const string& value1,
+                               const char* variable2, const string& value2,
+                               const char* variable3, const string& value3,
+                               const char* variable4, const string& value4,
+                               const char* variable5, const string& value5,
+                               const char* variable6, const string& value6,
+                               const char* variable7, const string& value7);
+  // Like the first Print(), except the substitutions are given as parameters.
+  void Print(const char* text, const char* variable1, const string& value1,
+                               const char* variable2, const string& value2,
+                               const char* variable3, const string& value3,
+                               const char* variable4, const string& value4,
+                               const char* variable5, const string& value5,
+                               const char* variable6, const string& value6,
+                               const char* variable7, const string& value7,
+                               const char* variable8, const string& value8);
 
   // Indent text by two spaces.  After calling Indent(), two spaces will be
   // inserted at the beginning of each line of text.  Indent() may be called
diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc
index 1331a8d9..258dd986 100644
--- a/src/google/protobuf/io/printer_unittest.cc
+++ b/src/google/protobuf/io/printer_unittest.cc
@@ -37,6 +37,7 @@
 #include <google/protobuf/io/printer.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 
+#include <google/protobuf/stubs/logging.h>
 #include <google/protobuf/stubs/common.h>
 #include <google/protobuf/testing/googletest.h>
 #include <gtest/gtest.h>
diff --git a/src/google/protobuf/io/strtod.cc b/src/google/protobuf/io/strtod.cc
index 56973439..a90bb9a3 100644
--- a/src/google/protobuf/io/strtod.cc
+++ b/src/google/protobuf/io/strtod.cc
@@ -32,8 +32,10 @@
 
 #include <cstdio>
 #include <cstring>
+#include <limits>
 #include <string>
 
+#include <google/protobuf/stubs/logging.h>
 #include <google/protobuf/stubs/common.h>
 
 namespace google {
@@ -108,6 +110,16 @@ double NoLocaleStrtod(const char* text, char** original_endptr) {
   return result;
 }
 
+float SafeDoubleToFloat(double value) {
+  if (value > std::numeric_limits<float>::max()) {
+    return std::numeric_limits<float>::infinity();
+  } else if (value < -std::numeric_limits<float>::max()) {
+    return -std::numeric_limits<float>::infinity();
+  } else {
+    return static_cast<float>(value);
+  }
+}
+
 }  // namespace io
 }  // namespace protobuf
 }  // namespace google
diff --git a/src/google/protobuf/io/strtod.h b/src/google/protobuf/io/strtod.h
index c2efc8d3..f56e41c8 100644
--- a/src/google/protobuf/io/strtod.h
+++ b/src/google/protobuf/io/strtod.h
@@ -43,6 +43,11 @@ namespace io {
 // uses a dot as the decimal separator.
 double NoLocaleStrtod(const char* str, char** endptr);
 
+// Casts a double value to a float value. If the value is outside of the
+// representable range of float, it will be converted to positive or negative
+// infinity.
+float SafeDoubleToFloat(double value);
+
 }  // namespace io
 }  // namespace protobuf
 
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
index ef2de300..3d57707c 100644
--- a/src/google/protobuf/io/tokenizer.cc
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -90,6 +90,7 @@
 
 #include <google/protobuf/io/tokenizer.h>
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
 #include <google/protobuf/stubs/stringprintf.h>
 #include <google/protobuf/io/strtod.h>
 #include <google/protobuf/io/zero_copy_stream.h>
@@ -374,7 +375,7 @@ void Tokenizer::ConsumeString(char delimiter) {
           // Possibly followed by two more octal digits, but these will
           // just be consumed by the main loop anyway so we don't need
           // to do so explicitly here.
-        } else if (TryConsume('x') || TryConsume('X')) {
+        } else if (TryConsume('x')) {
           if (!TryConsumeOne<HexDigit>()) {
             AddError("Expected hex digits for escape sequence.");
           }
@@ -762,6 +763,15 @@ bool Tokenizer::NextWithComments(string* prev_trailing_comments,
                              next_leading_comments);
 
   if (current_.type == TYPE_START) {
+    // Ignore unicode byte order mark(BOM) if it appears at the file
+    // beginning. Only UTF-8 BOM (0xEF 0xBB 0xBF) is accepted.
+    if (TryConsume((char)0xEF)) {
+      if (!TryConsume((char)0xBB) || !TryConsume((char)0xBF)) {
+        AddError("Proto file starts with 0xEF but not UTF-8 BOM. "
+                 "Only UTF-8 is accepted for proto file.");
+        return false;
+      }
+    }
     collector.DetachFromPrev();
   } else {
     // A comment appearing on the same line must be attached to the previous
diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h
index 98576f56..49885eda 100644
--- a/src/google/protobuf/io/tokenizer.h
+++ b/src/google/protobuf/io/tokenizer.h
@@ -40,6 +40,7 @@
 #include <string>
 #include <vector>
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
 
 namespace google {
 namespace protobuf {
diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc
index de096fb9..20d50a2c 100644
--- a/src/google/protobuf/io/tokenizer_unittest.cc
+++ b/src/google/protobuf/io/tokenizer_unittest.cc
@@ -41,6 +41,7 @@
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
 #include <google/protobuf/stubs/strutil.h>
 #include <google/protobuf/stubs/substitute.h>
 #include <google/protobuf/testing/googletest.h>
@@ -874,6 +875,8 @@ ErrorCase kErrorCases[] = {
   // String errors.
   { "'\\l' foo", true,
     "0:2: Invalid escape sequence in string literal.\n" },
+  { "'\\X' foo", true,
+    "0:2: Invalid escape sequence in string literal.\n" },
   { "'\\x' foo", true,
     "0:3: Expected hex digits for escape sequence.\n" },
   { "'foo", false,
diff --git a/src/google/protobuf/io/zero_copy_stream.cc b/src/google/protobuf/io/zero_copy_stream.cc
index f77c768f..186de001 100644
--- a/src/google/protobuf/io/zero_copy_stream.cc
+++ b/src/google/protobuf/io/zero_copy_stream.cc
@@ -34,6 +34,7 @@
 
 #include <google/protobuf/io/zero_copy_stream.h>
 
+#include <google/protobuf/stubs/logging.h>
 #include <google/protobuf/stubs/common.h>
 
 namespace google {
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.cc b/src/google/protobuf/io/zero_copy_stream_impl.cc
index f7901b27..7ec2b5da 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl.cc
+++ b/src/google/protobuf/io/zero_copy_stream_impl.cc
@@ -46,6 +46,7 @@
 
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
 #include <google/protobuf/stubs/stl_util.h>
 
 
diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
index 97b73b88..083beca4 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
+++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
@@ -39,6 +39,7 @@
 
 #include <google/protobuf/stubs/casts.h>
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
 #include <google/protobuf/stubs/stl_util.h>
 
 namespace google {
@@ -156,6 +157,7 @@ StringOutputStream::~StringOutputStream() {
 }
 
 bool StringOutputStream::Next(void** data, int* size) {
+  GOOGLE_CHECK_NE(NULL, target_);
   int old_size = target_->size();
 
   // Grow the string.
@@ -187,14 +189,44 @@ bool StringOutputStream::Next(void** data, int* size) {
 
 void StringOutputStream::BackUp(int count) {
   GOOGLE_CHECK_GE(count, 0);
+  GOOGLE_CHECK_NE(NULL, target_);
   GOOGLE_CHECK_LE(count, target_->size());
   target_->resize(target_->size() - count);
 }
 
 int64 StringOutputStream::ByteCount() const {
+  GOOGLE_CHECK_NE(NULL, target_);
   return target_->size();
 }
 
+void StringOutputStream::SetString(string* target) {
+  target_ = target;
+}
+
+// ===================================================================
+
+LazyStringOutputStream::LazyStringOutputStream(
+    ResultCallback<string*>* callback)
+    : StringOutputStream(NULL),
+      callback_(GOOGLE_CHECK_NOTNULL(callback)),
+      string_is_set_(false) {
+}
+
+LazyStringOutputStream::~LazyStringOutputStream() {
+}
+
+bool LazyStringOutputStream::Next(void** data, int* size) {
+  if (!string_is_set_) {
+    SetString(callback_->Run());
+    string_is_set_ = true;
+  }
+  return StringOutputStream::Next(data, size);
+}
+
+int64 LazyStringOutputStream::ByteCount() const {
+  return string_is_set_ ? StringOutputStream::ByteCount() : 0;
+}
+
 // ===================================================================
 
 CopyingInputStream::~CopyingInputStream() {}
diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.h b/src/google/protobuf/io/zero_copy_stream_impl_lite.h
index 4360b18f..1c397dea 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl_lite.h
+++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.h
@@ -44,10 +44,15 @@
 #ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__
 #define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__
 
+#include <memory>
+#ifndef _SHARED_PTR_H
+#include <google/protobuf/stubs/shared_ptr.h>
+#endif
 #include <string>
 #include <iosfwd>
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/scoped_ptr.h>
 #include <google/protobuf/stubs/stl_util.h>
 
 
@@ -143,6 +148,9 @@ class LIBPROTOBUF_EXPORT StringOutputStream : public ZeroCopyOutputStream {
   void BackUp(int count);
   int64 ByteCount() const;
 
+ protected:
+  void SetString(string* target);
+
  private:
   static const int kMinimumSize = 16;
 
@@ -151,6 +159,27 @@ class LIBPROTOBUF_EXPORT StringOutputStream : public ZeroCopyOutputStream {
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StringOutputStream);
 };
 
+// LazyStringOutputStream is a StringOutputStream with lazy acquisition of
+// the output string from a callback. The string is owned externally, and not
+// deleted in the stream destructor.
+class LIBPROTOBUF_EXPORT LazyStringOutputStream : public StringOutputStream {
+ public:
+  // Callback should be permanent (non-self-deleting). Ownership is transferred
+  // to the LazyStringOutputStream.
+  explicit LazyStringOutputStream(ResultCallback<string*>* callback);
+  ~LazyStringOutputStream();
+
+  // implements ZeroCopyOutputStream, overriding StringOutputStream -----------
+  bool Next(void** data, int* size);
+  int64 ByteCount() const;
+
+ private:
+  const scoped_ptr<ResultCallback<string*> > callback_;
+  bool string_is_set_;
+
+  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LazyStringOutputStream);
+};
+
 // Note:  There is no StringInputStream.  Instead, just create an
 // ArrayInputStream as follows:
 //   ArrayInputStream input(str.data(), str.size());
@@ -234,7 +263,7 @@ class LIBPROTOBUF_EXPORT CopyingInputStreamAdaptor : public ZeroCopyInputStream
 
   // Data is read into this buffer.  It may be NULL if no buffer is currently
   // in use.  Otherwise, it points to an array of size buffer_size_.
-  scoped_array<uint8> buffer_;
+  google::protobuf::scoped_array<uint8> buffer_;
   const int buffer_size_;
 
   // Number of valid bytes currently in the buffer (i.e. the size last
@@ -323,7 +352,7 @@ class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStrea
 
   // Data is written from this buffer.  It may be NULL if no buffer is
   // currently in use.  Otherwise, it points to an array of size buffer_size_.
-  scoped_array<uint8> buffer_;
+  google::protobuf::scoped_array<uint8> buffer_;
   const int buffer_size_;
 
   // Number of valid bytes currently in the buffer (i.e. the size last
diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc
index dd3d1285..8c7358c1 100644
--- a/src/google/protobuf/io/zero_copy_stream_unittest.cc
+++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc
@@ -46,7 +46,6 @@
 //   "parametized tests" so that one set of tests can be used on all the
 //   implementations.
 
-#include "config.h"
 
 #ifdef _MSC_VER
 #include <io.h>
@@ -58,6 +57,10 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <errno.h>
+#include <memory>
+#ifndef _SHARED_PTR_H
+#include <google/protobuf/stubs/shared_ptr.h>
+#endif
 #include <sstream>
 
 #include <google/protobuf/io/zero_copy_stream_impl.h>
@@ -68,6 +71,8 @@
 #endif
 
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/scoped_ptr.h>
 #include <google/protobuf/testing/googletest.h>
 #include <google/protobuf/testing/file.h>
 #include <gtest/gtest.h>
@@ -196,7 +201,7 @@ void IoTest::WriteString(ZeroCopyOutputStream* output, const string& str) {
 }
 
 void IoTest::ReadString(ZeroCopyInputStream* input, const string& str) {
-  scoped_array<char> buffer(new char[str.size() + 1]);
+  google::protobuf::scoped_array<char> buffer(new char[str.size() + 1]);
   buffer[str.size()] = '\0';
   EXPECT_EQ(ReadFromInput(input, buffer.get(), str.size()), str.size());
   EXPECT_STREQ(str.c_str(), buffer.get());