Massive roll-up of changes. See CHANGES.txt.

author: kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> 2009-12-18 02:11:36 +0000
committer: kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> 2009-12-18 02:11:36 +0000
commit: fccb146e3fe437b0df1e9c50d4b8e1080ddb4bd9 (patch)
tree: 9f2d9fe0267d96a54e541377ffeada3d0bff0d1d /src/google/protobuf/io
parent: d5cf7b55a6a1f959d1646785f63ca2b62da78079 (diff)
12 files changed, 697 insertions, 257 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc
index e17a4775..6a91a13d 100644
--- a/src/google/protobuf/io/coded_stream.cc
+++ b/src/google/protobuf/io/coded_stream.cc
@@ -38,9 +38,9 @@
 // will not cross the end of the buffer, since we can avoid a lot
 // of branching in this case.
 
-#include <stack>
+#include <google/protobuf/io/coded_stream_inl.h>
+#include <algorithm>
 #include <limits.h>
-#include <google/protobuf/io/coded_stream.h>
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/stubs/common.h>
 #include <google/protobuf/stubs/stl_util-inl.h>
@@ -52,11 +52,6 @@ namespace io {
 
 namespace {
 
-static const int kDefaultTotalBytesLimit = 64 << 20;  // 64MB
-
-static const int kDefaultTotalBytesWarningThreshold = 32 << 20;  // 32MB
-static const int kDefaultRecursionLimit = 64;
-
 static const int kMaxVarintBytes = 10;
 static const int kMaxVarint32Bytes = 5;
 
@@ -65,72 +60,28 @@ static const int kMaxVarint32Bytes = 5;
 
 // CodedInputStream ==================================================
 
-CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
-  : input_(input),
-    buffer_(NULL),
-    buffer_size_(0),
-    total_bytes_read_(0),
-    overflow_bytes_(0),
-    last_tag_(0),
-    legitimate_message_end_(false),
-    aliasing_enabled_(false),
-    current_limit_(INT_MAX),
-    buffer_size_after_limit_(0),
-    total_bytes_limit_(kDefaultTotalBytesLimit),
-    total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
-    recursion_depth_(0),
-    recursion_limit_(kDefaultRecursionLimit) {
-  // Eagerly Refresh() so buffer space is immediately available.
-  Refresh();
-}
-
-CodedInputStream::CodedInputStream(const uint8* buffer, int size)
-  : input_(NULL),
-    buffer_(buffer),
-    buffer_size_(size),
-    total_bytes_read_(size),
-    overflow_bytes_(0),
-    last_tag_(0),
-    legitimate_message_end_(false),
-    aliasing_enabled_(false),
-    current_limit_(size),
-    buffer_size_after_limit_(0),
-    total_bytes_limit_(kDefaultTotalBytesLimit),
-    total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
-    recursion_depth_(0),
-    recursion_limit_(kDefaultRecursionLimit) {
-  // Note that setting current_limit_ == size is important to prevent some
-  // code paths from trying to access input_ and segfaulting.
-}
-
-CodedInputStream::~CodedInputStream() {
-  if (input_ != NULL) {
-    BackUpInputToCurrentPosition();
-  }
-}
-
 
 void CodedInputStream::BackUpInputToCurrentPosition() {
-  int backup_bytes = buffer_size_ + buffer_size_after_limit_ + overflow_bytes_;
+  int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_;
   if (backup_bytes > 0) {
     input_->BackUp(backup_bytes);
 
     // total_bytes_read_ doesn't include overflow_bytes_.
-    total_bytes_read_ -= buffer_size_ + buffer_size_after_limit_;
-    buffer_size_ = 0;
+    total_bytes_read_ -= BufferSize() + buffer_size_after_limit_;
+    buffer_end_ = buffer_;
     buffer_size_after_limit_ = 0;
     overflow_bytes_ = 0;
   }
 }
 
 inline void CodedInputStream::RecomputeBufferLimits() {
-  buffer_size_ += buffer_size_after_limit_;
+  buffer_end_ += buffer_size_after_limit_;
   int closest_limit = min(current_limit_, total_bytes_limit_);
   if (closest_limit < total_bytes_read_) {
     // The limit position is in the current buffer.  We must adjust
     // the buffer size accordingly.
     buffer_size_after_limit_ = total_bytes_read_ - closest_limit;
-    buffer_size_ -= buffer_size_after_limit_;
+    buffer_end_ -= buffer_size_after_limit_;
   } else {
     buffer_size_after_limit_ = 0;
   }
@@ -139,7 +90,7 @@ inline void CodedInputStream::RecomputeBufferLimits() {
 CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) {
   // Current position relative to the beginning of the stream.
   int current_position = total_bytes_read_ -
-      (buffer_size_ + buffer_size_after_limit_);
+      (BufferSize() + buffer_size_after_limit_);
 
   Limit old_limit = current_limit_;
 
@@ -176,7 +127,7 @@ void CodedInputStream::PopLimit(Limit limit) {
 int CodedInputStream::BytesUntilLimit() {
   if (current_limit_ == INT_MAX) return -1;
   int current_position = total_bytes_read_ -
-      (buffer_size_ + buffer_size_after_limit_);
+      (BufferSize() + buffer_size_after_limit_);
 
   return current_limit_ - current_position;
 }
@@ -186,7 +137,7 @@ void CodedInputStream::SetTotalBytesLimit(
   // Make sure the limit isn't already past, since this could confuse other
   // code.
   int current_position = total_bytes_read_ -
-      (buffer_size_ + buffer_size_after_limit_);
+      (BufferSize() + buffer_size_after_limit_);
   total_bytes_limit_ = max(current_position, total_bytes_limit);
   total_bytes_warning_threshold_ = warning_threshold;
   RecomputeBufferLimits();
@@ -203,7 +154,9 @@ void CodedInputStream::PrintTotalBytesLimitError() {
 bool CodedInputStream::Skip(int count) {
   if (count < 0) return false;  // security: count is often user-supplied
 
-  if (count <= buffer_size_) {
+  const int original_buffer_size = BufferSize();
+
+  if (count <= original_buffer_size) {
     // Just skipping within the current buffer.  Easy.
     Advance(count);
     return true;
@@ -211,13 +164,13 @@ bool CodedInputStream::Skip(int count) {
 
   if (buffer_size_after_limit_ > 0) {
     // We hit a limit inside this buffer.  Advance to the limit and fail.
-    Advance(buffer_size_);
+    Advance(original_buffer_size);
     return false;
   }
 
-  count -= buffer_size_;
+  count -= original_buffer_size;
   buffer_ = NULL;
-  buffer_size_ = 0;
+  buffer_end_ = buffer_;
 
   // Make sure this skip doesn't try to skip past the current limit.
   int closest_limit = min(current_limit_, total_bytes_limit_);
@@ -236,20 +189,21 @@ bool CodedInputStream::Skip(int count) {
 }
 
 bool CodedInputStream::GetDirectBufferPointer(const void** data, int* size) {
-  if (buffer_size_ == 0 && !Refresh()) return false;
+  if (BufferSize() == 0 && !Refresh()) return false;
 
   *data = buffer_;
-  *size = buffer_size_;
+  *size = BufferSize();
   return true;
 }
 
 bool CodedInputStream::ReadRaw(void* buffer, int size) {
-  while (buffer_size_ < size) {
+  int current_buffer_size;
+  while ((current_buffer_size = BufferSize()) < size) {
     // Reading past end of buffer.  Copy what we have, then refresh.
-    memcpy(buffer, buffer_, buffer_size_);
-    buffer = reinterpret_cast<uint8*>(buffer) + buffer_size_;
-    size -= buffer_size_;
-    Advance(buffer_size_);
+    memcpy(buffer, buffer_, current_buffer_size);
+    buffer = reinterpret_cast<uint8*>(buffer) + current_buffer_size;
+    size -= current_buffer_size;
+    Advance(current_buffer_size);
     if (!Refresh()) return false;
   }
 
@@ -261,27 +215,25 @@ bool CodedInputStream::ReadRaw(void* buffer, int size) {
 
 bool CodedInputStream::ReadString(string* buffer, int size) {
   if (size < 0) return false;  // security: size is often user-supplied
+  return InternalReadStringInline(buffer, size);
+}
 
+bool CodedInputStream::ReadStringFallback(string* buffer, int size) {
   if (!buffer->empty()) {
     buffer->clear();
   }
 
-  if (size < buffer_size_) {
-    STLStringResizeUninitialized(buffer, size);
-    memcpy((uint8*)buffer->data(), buffer_, size);
-    Advance(size);
-    return true;
-  }
-
-  while (buffer_size_ < size) {
+  int current_buffer_size;
+  while ((current_buffer_size = BufferSize()) < size) {
     // Some STL implementations "helpfully" crash on buffer->append(NULL, 0).
-    if (buffer_size_ != 0) {
+    if (current_buffer_size != 0) {
       // Note:  string1.append(string2) is O(string2.size()) (as opposed to
       //   O(string1.size() + string2.size()), which would be bad).
-      buffer->append(reinterpret_cast<const char*>(buffer_), buffer_size_);
+      buffer->append(reinterpret_cast<const char*>(buffer_),
+                     current_buffer_size);
     }
-    size -= buffer_size_;
-    Advance(buffer_size_);
+    size -= current_buffer_size;
+    Advance(current_buffer_size);
     if (!Refresh()) return false;
   }
 
@@ -292,11 +244,11 @@ bool CodedInputStream::ReadString(string* buffer, int size) {
 }
 
 
-bool CodedInputStream::ReadLittleEndian32(uint32* value) {
+bool CodedInputStream::ReadLittleEndian32Fallback(uint32* value) {
   uint8 bytes[sizeof(*value)];
 
   const uint8* ptr;
-  if (buffer_size_ >= sizeof(*value)) {
+  if (BufferSize() >= sizeof(*value)) {
     // Fast path:  Enough bytes in the buffer to read directly.
     ptr = buffer_;
     Advance(sizeof(*value));
@@ -305,19 +257,15 @@ bool CodedInputStream::ReadLittleEndian32(uint32* value) {
     if (!ReadRaw(bytes, sizeof(*value))) return false;
     ptr = bytes;
   }
-
-  *value = (static_cast<uint32>(ptr[0])      ) |
-           (static_cast<uint32>(ptr[1]) <<  8) |
-           (static_cast<uint32>(ptr[2]) << 16) |
-           (static_cast<uint32>(ptr[3]) << 24);
+  ReadLittleEndian32FromArray(ptr, value);
   return true;
 }
 
-bool CodedInputStream::ReadLittleEndian64(uint64* value) {
+bool CodedInputStream::ReadLittleEndian64Fallback(uint64* value) {
   uint8 bytes[sizeof(*value)];
 
   const uint8* ptr;
-  if (buffer_size_ >= sizeof(*value)) {
+  if (BufferSize() >= sizeof(*value)) {
     // Fast path:  Enough bytes in the buffer to read directly.
     ptr = buffer_;
     Advance(sizeof(*value));
@@ -326,99 +274,152 @@ bool CodedInputStream::ReadLittleEndian64(uint64* value) {
     if (!ReadRaw(bytes, sizeof(*value))) return false;
     ptr = bytes;
   }
+  ReadLittleEndian64FromArray(ptr, value);
+  return true;
+}
+
+namespace {
+
+inline const uint8* ReadVarint32FromArray(
+    const uint8* buffer, uint32* value) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+inline const uint8* ReadVarint32FromArray(const uint8* buffer, uint32* value) {
+  // Fast path:  We have enough bytes left in the buffer to guarantee that
+  // this read won't cross the end, so we can skip the checks.
+  const uint8* ptr = buffer;
+  uint32 b;
+  uint32 result;
+
+  b = *(ptr++); result  = (b & 0x7F)      ; if (!(b & 0x80)) goto done;
+  b = *(ptr++); result |= (b & 0x7F) <<  7; if (!(b & 0x80)) goto done;
+  b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
+  b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
+  b = *(ptr++); result |=  b         << 28; if (!(b & 0x80)) goto done;
+
+  // If the input is larger than 32 bits, we still need to read it all
+  // and discard the high-order bits.
+  for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) {
+    b = *(ptr++); if (!(b & 0x80)) goto done;
+  }
+
+  // We have overrun the maximum size of a varint (10 bytes).  Assume
+  // the data is corrupt.
+  return NULL;
+
+ done:
+  *value = result;
+  return ptr;
+}
+
+}  // namespace
 
-  uint32 part0 = (static_cast<uint32>(ptr[0])      ) |
-                 (static_cast<uint32>(ptr[1]) <<  8) |
-                 (static_cast<uint32>(ptr[2]) << 16) |
-                 (static_cast<uint32>(ptr[3]) << 24);
-  uint32 part1 = (static_cast<uint32>(ptr[4])      ) |
-                 (static_cast<uint32>(ptr[5]) <<  8) |
-                 (static_cast<uint32>(ptr[6]) << 16) |
-                 (static_cast<uint32>(ptr[7]) << 24);
-  *value = static_cast<uint64>(part0) |
-          (static_cast<uint64>(part1) << 32);
+bool CodedInputStream::ReadVarint32Slow(uint32* value) {
+  uint64 result;
+  // Directly invoke ReadVarint64Fallback, since we already tried to optimize
+  // for one-byte varints.
+  if (!ReadVarint64Fallback(&result)) return false;
+  *value = (uint32)result;
   return true;
 }
 
 bool CodedInputStream::ReadVarint32Fallback(uint32* value) {
-  if (buffer_size_ >= kMaxVarintBytes ||
+  if (BufferSize() >= kMaxVarintBytes ||
       // Optimization:  If the varint ends at exactly the end of the buffer,
       // we can detect that and still use the fast path.
-      (buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) {
-    // Fast path:  We have enough bytes left in the buffer to guarantee that
-    // this read won't cross the end, so we can skip the checks.
-    const uint8* ptr = buffer_;
-    uint32 b;
-    uint32 result;
-
-    b = *(ptr++); result  = (b & 0x7F)      ; if (!(b & 0x80)) goto done;
-    b = *(ptr++); result |= (b & 0x7F) <<  7; if (!(b & 0x80)) goto done;
-    b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
-    b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
-    b = *(ptr++); result |=  b         << 28; if (!(b & 0x80)) goto done;
-
-    // If the input is larger than 32 bits, we still need to read it all
-    // and discard the high-order bits.
-    for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) {
-      b = *(ptr++); if (!(b & 0x80)) goto done;
-    }
-
-    // We have overrun the maximum size of a varint (10 bytes).  Assume
-    // the data is corrupt.
-    return false;
-
-   done:
-    Advance(ptr - buffer_);
-    *value = result;
+      (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
+    const uint8* end = ReadVarint32FromArray(buffer_, value);
+    if (end == NULL) return false;
+    buffer_ = end;
     return true;
-
   } else {
-    // Optimization:  If we're at a limit, detect that quickly.  (This is
-    // common when reading tags.)
-    while (buffer_size_ == 0) {
-      // Detect cases where we definitely hit a byte limit without calling
-      // Refresh().
-      if (// If we hit a limit, buffer_size_after_limit_ will be non-zero.
-          buffer_size_after_limit_ > 0 &&
-          // Make sure that the limit we hit is not total_bytes_limit_, since
-          // in that case we still need to call Refresh() so that it prints an
-          // error.
-          total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) {
-        // We hit a byte limit.
-        legitimate_message_end_ = true;
-        return false;
-      }
+    // Really slow case: we will incur the cost of an extra function call here,
+    // but moving this out of line reduces the size of this function, which
+    // improves the common case. In micro benchmarks, this is worth about 10-15%
+    return ReadVarint32Slow(value);
+  }
+}
 
-      // Call refresh.
-      if (!Refresh()) {
-        // Refresh failed.  Make sure that it failed due to EOF, not because
-        // we hit total_bytes_limit_, which, unlike normal limits, is not a
-        // valid place to end a message.
-        int current_position = total_bytes_read_ - buffer_size_after_limit_;
-        if (current_position >= total_bytes_limit_) {
-          // Hit total_bytes_limit_.  But if we also hit the normal limit,
-          // we're still OK.
-          legitimate_message_end_ = current_limit_ == total_bytes_limit_;
-        } else {
-          legitimate_message_end_ = true;
-        }
-        return false;
+uint32 CodedInputStream::ReadTagSlow() {
+  if (buffer_ == buffer_end_) {
+    // Call refresh.
+    if (!Refresh()) {
+      // Refresh failed.  Make sure that it failed due to EOF, not because
+      // we hit total_bytes_limit_, which, unlike normal limits, is not a
+      // valid place to end a message.
+      int current_position = total_bytes_read_ - buffer_size_after_limit_;
+      if (current_position >= total_bytes_limit_) {
+        // Hit total_bytes_limit_.  But if we also hit the normal limit,
+        // we're still OK.
+        legitimate_message_end_ = current_limit_ == total_bytes_limit_;
+      } else {
+        legitimate_message_end_ = true;
       }
+      return 0;
     }
+  }
 
-    // Slow path:  Just do a 64-bit read.
-    uint64 result;
-    if (!ReadVarint64(&result)) return false;
-    *value = (uint32)result;
-    return true;
+  // For the slow path, just do a 64-bit read. Try to optimize for one-byte tags
+  // again, since we have now refreshed the buffer.
+  uint64 result;
+  if (!ReadVarint64(&result)) return 0;
+  return static_cast<uint32>(result);
+}
+
+uint32 CodedInputStream::ReadTagFallback() {
+  if (BufferSize() >= kMaxVarintBytes ||
+      // Optimization:  If the varint ends at exactly the end of the buffer,
+      // we can detect that and still use the fast path.
+      (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
+    uint32 tag;
+    const uint8* end = ReadVarint32FromArray(buffer_, &tag);
+    if (end == NULL) {
+      return 0;
+    }
+    buffer_ = end;
+    return tag;
+  } else {
+    // We are commonly at a limit when attempting to read tags. Try to quickly
+    // detect this case without making another function call.
+    if (buffer_ == buffer_end_ && buffer_size_after_limit_ > 0 &&
+        // Make sure that the limit we hit is not total_bytes_limit_, since
+        // in that case we still need to call Refresh() so that it prints an
+        // error.
+        total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) {
+      // We hit a byte limit.
+      legitimate_message_end_ = true;
+      return 0;
+    }
+    return ReadTagSlow();
   }
 }
 
-bool CodedInputStream::ReadVarint64(uint64* value) {
-  if (buffer_size_ >= kMaxVarintBytes ||
+bool CodedInputStream::ReadVarint64Slow(uint64* value) {
+  // Slow path:  This read might cross the end of the buffer, so we
+  // need to check and refresh the buffer if and when it does.
+
+  uint64 result = 0;
+  int count = 0;
+  uint32 b;
+
+  do {
+    if (count == kMaxVarintBytes) return false;
+    while (buffer_ == buffer_end_) {
+      if (!Refresh()) return false;
+    }
+    b = *buffer_;
+    result |= static_cast<uint64>(b & 0x7F) << (7 * count);
+    Advance(1);
+    ++count;
+  } while (b & 0x80);
+
+  *value = result;
+  return true;
+}
+
+bool CodedInputStream::ReadVarint64Fallback(uint64* value) {
+  if (BufferSize() >= kMaxVarintBytes ||
       // Optimization:  If the varint ends at exactly the end of the buffer,
       // we can detect that and still use the fast path.
-      (buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) {
+      (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
     // Fast path:  We have enough bytes left in the buffer to guarantee that
     // this read won't cross the end, so we can skip the checks.
 
@@ -442,7 +443,7 @@ bool CodedInputStream::ReadVarint64(uint64* value) {
 
     // We have overrun the maximum size of a varint (10 bytes).  The data
     // must be corrupt.
-    return false;
+    return NULL;
 
    done:
     Advance(ptr - buffer_);
@@ -450,33 +451,13 @@ bool CodedInputStream::ReadVarint64(uint64* value) {
              (static_cast<uint64>(part1) << 28) |
              (static_cast<uint64>(part2) << 56);
     return true;
-
   } else {
-    // Slow path:  This read might cross the end of the buffer, so we
-    // need to check and refresh the buffer if and when it does.
-
-    uint64 result = 0;
-    int count = 0;
-    uint32 b;
-
-    do {
-      if (count == kMaxVarintBytes) return false;
-      while (buffer_size_ == 0) {
-        if (!Refresh()) return false;
-      }
-      b = *buffer_;
-      result |= static_cast<uint64>(b & 0x7F) << (7 * count);
-      Advance(1);
-      ++count;
-    } while(b & 0x80);
-
-    *value = result;
-    return true;
+    return ReadVarint64Slow(value);
   }
 }
 
 bool CodedInputStream::Refresh() {
-  GOOGLE_DCHECK_EQ(buffer_size_, 0);
+  GOOGLE_DCHECK_EQ(0, BufferSize());
 
   if (buffer_size_after_limit_ > 0 || overflow_bytes_ > 0 ||
       total_bytes_read_ == current_limit_) {
@@ -507,25 +488,27 @@ bool CodedInputStream::Refresh() {
   }
 
   const void* void_buffer;
-  if (input_->Next(&void_buffer, &buffer_size_)) {
+  int buffer_size;
+  if (input_->Next(&void_buffer, &buffer_size)) {
     buffer_ = reinterpret_cast<const uint8*>(void_buffer);
-    GOOGLE_CHECK_GE(buffer_size_, 0);
+    buffer_end_ = buffer_ + buffer_size;
+    GOOGLE_CHECK_GE(buffer_size, 0);
 
-    if (total_bytes_read_ <= INT_MAX - buffer_size_) {
-      total_bytes_read_ += buffer_size_;
+    if (total_bytes_read_ <= INT_MAX - buffer_size) {
+      total_bytes_read_ += buffer_size;
     } else {
-      // Overflow.  Reset buffer_size_ to not include the bytes beyond INT_MAX.
+      // Overflow.  Reset buffer_end_ to not include the bytes beyond INT_MAX.
       // We can't get that far anyway, because total_bytes_limit_ is guaranteed
       // to be less than it.  We need to keep track of the number of bytes
       // we discarded, though, so that we can call input_->BackUp() to back
       // up over them on destruction.
 
       // The following line is equivalent to:
-      //   overflow_bytes_ = total_bytes_read_ + buffer_size_ - INT_MAX;
+      //   overflow_bytes_ = total_bytes_read_ + buffer_size - INT_MAX;
       // except that it avoids overflows.  Signed integer overflow has
       // undefined results according to the C standard.
-      overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size_);
-      buffer_size_ -= overflow_bytes_;
+      overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size);
+      buffer_end_ -= overflow_bytes_;
       total_bytes_read_ = INT_MAX;
     }
 
@@ -533,7 +516,7 @@ bool CodedInputStream::Refresh() {
     return true;
   } else {
     buffer_ = NULL;
-    buffer_size_ = 0;
+    buffer_end_ = NULL;
     return false;
   }
 }
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h
index fa023f35..dcbb0d45 100644
--- a/src/google/protobuf/io/coded_stream.h
+++ b/src/google/protobuf/io/coded_stream.h
@@ -114,10 +114,15 @@
 #include <sys/param.h>
 #endif  // !_MSC_VER
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/common.h>          // for GOOGLE_PREDICT_TRUE macro
 
 namespace google {
 
 namespace protobuf {
+
+class DescriptorPool;
+class MessageFactory;
+
 namespace io {
 
 // Defined in this file.
@@ -166,6 +171,11 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // types of data not covered by the CodedInputStream interface.
   bool GetDirectBufferPointer(const void** data, int* size);
 
+  // Like GetDirectBufferPointer, but this method is inlined, and does not
+  // attempt to Refresh() if the buffer is currently empty.
+  inline void GetDirectBufferPointerInline(const void** data,
+                                           int* size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
   // Read raw bytes, copying them into the given buffer.
   bool ReadRaw(void* buffer, int size);
 
@@ -177,6 +187,10 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // could claim that a string is going to be MAX_INT bytes long in order to
   // crash the server because it can't allocate this much space at once.
   bool ReadString(string* buffer, int size);
+  // Like the above, with inlined optimizations. This should only be used
+  // by the protobuf implementation.
+  inline bool InternalReadStringInline(string* buffer,
+                                       int size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
 
 
   // Read a 32-bit little-endian integer.
@@ -184,6 +198,15 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // Read a 64-bit little-endian integer.
   bool ReadLittleEndian64(uint64* value);
 
+  // These methods read from an externally provided buffer. The caller is
+  // responsible for ensuring that the buffer has sufficient space.
+  // Read a 32-bit little-endian integer.
+  static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
+                                                   uint32* value);
+  // Read a 64-bit little-endian integer.
+  static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
+                                                   uint64* value);
+
   // Read an unsigned integer with Varint encoding, truncating to 32 bits.
   // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
   // it to uint32, but may be more efficient.
@@ -208,6 +231,17 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // when given a constant parameter, but GCC doesn't want to inline by default.
   bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
 
+  // Like above, except this reads from the specified buffer. The caller is
+  // responsible for ensuring that the buffer is large enough to read a varint
+  // of the expected size. For best performance, use a compile-time constant as
+  // the expected tag parameter.
+  //
+  // Returns a pointer beyond the expected tag if it was found, or NULL if it
+  // was not.
+  static const uint8* ExpectTagFromArray(
+      const uint8* buffer,
+      uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
   // Usually returns true if no more bytes can be read.  Always returns false
   // if more bytes can be read.  If ExpectAtEnd() returns true, a subsequent
   // call to LastTagWas() will act as if ReadTag() had been called and returned
@@ -318,12 +352,90 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // Decrements the recursion depth.
   void DecrementRecursionDepth();
 
+  // Extension Registry ----------------------------------------------
+  // ADVANCED USAGE:  99.9% of people can ignore this section.
+  //
+  // By default, when parsing extensions, the parser looks for extension
+  // definitions in the pool which owns the outer message's Descriptor.
+  // However, you may call SetExtensionRegistry() to provide an alternative
+  // pool instead.  This makes it possible, for example, to parse a message
+  // using a generated class, but represent some extensions using
+  // DynamicMessage.
+
+  // Set the pool used to look up extensions.  Most users do not need to call
+  // this as the correct pool will be chosen automatically.
+  //
+  // WARNING:  It is very easy to misuse this.  Carefully read the requirements
+  //   below.  Do not use this unless you are sure you need it.  Almost no one
+  //   does.
+  //
+  // Let's say you are parsing a message into message object m, and you want
+  // to take advantage of SetExtensionRegistry().  You must follow these
+  // requirements:
+  //
+  // The given DescriptorPool must contain m->GetDescriptor().  It is not
+  // sufficient for it to simply contain a descriptor that has the same name
+  // and content -- it must be the *exact object*.  In other words:
+  //   assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
+  //          m->GetDescriptor());
+  // There are two ways to satisfy this requirement:
+  // 1) Use m->GetDescriptor()->pool() as the pool.  This is generally useless
+  //    because this is the pool that would be used anyway if you didn't call
+  //    SetExtensionRegistry() at all.
+  // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
+  //    "underlay".  Read the documentation for DescriptorPool for more
+  //    information about underlays.
+  //
+  // You must also provide a MessageFactory.  This factory will be used to
+  // construct Message objects representing extensions.  The factory's
+  // GetPrototype() MUST return non-NULL for any Descriptor which can be found
+  // through the provided pool.
+  //
+  // If the provided factory might return instances of protocol-compiler-
+  // generated (i.e. compiled-in) types, or if the outer message object m is
+  // a generated type, then the given factory MUST have this property:  If
+  // GetPrototype() is given a Descriptor which resides in
+  // DescriptorPool::generated_pool(), the factory MUST return the same
+  // prototype which MessageFactory::generated_factory() would return.  That
+  // is, given a descriptor for a generated type, the factory must return an
+  // instance of the generated class (NOT DynamicMessage).  However, when
+  // given a descriptor for a type that is NOT in generated_pool, the factory
+  // is free to return any implementation.
+  //
+  // The reason for this requirement is that generated sub-objects may be
+  // accessed via the standard (non-reflection) extension accessor methods,
+  // and these methods will down-cast the object to the generated class type.
+  // If the object is not actually of that type, the results would be undefined.
+  // On the other hand, if an extension is not compiled in, then there is no
+  // way the code could end up accessing it via the standard accessors -- the
+  // only way to access the extension is via reflection.  When using reflection,
+  // DynamicMessage and generated messages are indistinguishable, so it's fine
+  // if these objects are represented using DynamicMessage.
+  //
+  // Using DynamicMessageFactory on which you have called
+  // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
+  // above requirement.
+  //
+  // If either pool or factory is NULL, both must be NULL.
+  //
+  // Note that this feature is ignored when parsing "lite" messages as they do
+  // not have descriptors.
+  void SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory);
+
+  // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
+  // has been provided.
+  const DescriptorPool* GetExtensionPool();
+
+  // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
+  // factory has been provided.
+  MessageFactory* GetExtensionFactory();
+
  private:
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
 
   ZeroCopyInputStream* input_;
   const uint8* buffer_;
-  int buffer_size_;       // size of current buffer
+  const uint8* buffer_end_;     // pointer to the end of the buffer.
   int total_bytes_read_;  // total bytes read from input_, including
                           // the current buffer
 
@@ -334,7 +446,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // LastTagWas() stuff.
   uint32 last_tag_;         // result of last ReadTag().
 
-  // This is set true by ReadVarint32Fallback() if it is called when exactly
+  // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
   // at EOF, or by ExpectAtEnd() when it returns true.  This happens when we
   // reach the end of a message and attempt to read another tag.
   bool legitimate_message_end_;
@@ -365,6 +477,12 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // Recursion depth limit, set by SetRecursionLimit().
   int recursion_limit_;
 
+  // See SetExtensionRegistry().
+  const DescriptorPool* extension_pool_;
+  MessageFactory* extension_factory_;
+
+  // Private member functions.
+
   // Advance the buffer by a given number of bytes.
   void Advance(int amount);
 
@@ -379,10 +497,36 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   void PrintTotalBytesLimitError();
 
   // Called when the buffer runs out to request more data.  Implies an
-  // Advance(buffer_size_).
+  // Advance(BufferSize()).
   bool Refresh();
 
+  // When parsing varints, we optimize for the common case of small values, and
+  // then optimize for the case when the varint fits within the current buffer
+  // piece. The Fallback method is used when we can't use the one-byte
+  // optimization. The Slow method is yet another fallback when the buffer is
+  // not large enough. Making the slow path out-of-line speeds up the common
+  // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
+  // message crosses multiple buffers.
   bool ReadVarint32Fallback(uint32* value);
+  bool ReadVarint64Fallback(uint64* value);
+  bool ReadVarint32Slow(uint32* value);
+  bool ReadVarint64Slow(uint64* value);
+  bool ReadLittleEndian32Fallback(uint32* value);
+  bool ReadLittleEndian64Fallback(uint64* value);
+  // Fallback/slow methods for reading tags. These do not update last_tag_,
+  // but will set legitimate_message_end_ if we are at the end of the input
+  // stream.
+  uint32 ReadTagFallback();
+  uint32 ReadTagSlow();
+  bool ReadStringFallback(string* buffer, int size);
+
+  // Return the size of the buffer.
+  int BufferSize() const;
+
+  static const int kDefaultTotalBytesLimit = 64 << 20;  // 64MB
+
+  static const int kDefaultTotalBytesWarningThreshold = 32 << 20;  // 32MB
+  static const int kDefaultRecursionLimit = 64;
 };
 
 // Class which encodes and writes binary data which is composed of varint-
@@ -568,7 +712,7 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
 // methods optimize for that case.
 
 inline bool CodedInputStream::ReadVarint32(uint32* value) {
-  if (buffer_size_ != 0 && *buffer_ < 0x80) {
+  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
     *value = *buffer_;
     Advance(1);
     return true;
@@ -577,20 +721,93 @@ inline bool CodedInputStream::ReadVarint32(uint32* value) {
   }
 }
 
+inline bool CodedInputStream::ReadVarint64(uint64* value) {
+  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
+    *value = *buffer_;
+    Advance(1);
+    return true;
+  } else {
+    return ReadVarint64Fallback(value);
+  }
+}
+
+// static
+inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
+    const uint8* buffer,
+    uint32* value) {
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+  memcpy(value, buffer, sizeof(*value));
+  return buffer + sizeof(*value);
+#else
+  *value = (static_cast<uint32>(buffer[0])      ) |
+           (static_cast<uint32>(buffer[1]) <<  8) |
+           (static_cast<uint32>(buffer[2]) << 16) |
+           (static_cast<uint32>(buffer[3]) << 24);
+  return buffer + sizeof(*value);
+#endif
+}
+// static
+inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
+    const uint8* buffer,
+    uint64* value) {
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+  memcpy(value, buffer, sizeof(*value));
+  return buffer + sizeof(*value);
+#else
+  uint32 part0 = (static_cast<uint32>(buffer[0])      ) |
+                 (static_cast<uint32>(buffer[1]) <<  8) |
+                 (static_cast<uint32>(buffer[2]) << 16) |
+                 (static_cast<uint32>(buffer[3]) << 24);
+  uint32 part1 = (static_cast<uint32>(buffer[4])      ) |
+                 (static_cast<uint32>(buffer[5]) <<  8) |
+                 (static_cast<uint32>(buffer[6]) << 16) |
+                 (static_cast<uint32>(buffer[7]) << 24);
+  *value = static_cast<uint64>(part0) |
+          (static_cast<uint64>(part1) << 32);
+  return buffer + sizeof(*value);
+#endif
+}
+
+inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+  if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
+    memcpy(value, buffer_, sizeof(*value));
+    Advance(sizeof(*value));
+    return true;
+  } else {
+    return ReadLittleEndian32Fallback(value);
+  }
+#else
+  return ReadLittleEndian32Fallback(value);
+#endif
+}
+
+inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+  if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
+    memcpy(value, buffer_, sizeof(*value));
+    Advance(sizeof(*value));
+    return true;
+  } else {
+    return ReadLittleEndian64Fallback(value);
+  }
+#else
+  return ReadLittleEndian64Fallback(value);
+#endif
+}
+
 inline uint32 CodedInputStream::ReadTag() {
-  if (buffer_size_ != 0 && buffer_[0] < 0x80) {
+  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] < 0x80) {
     last_tag_ = buffer_[0];
     Advance(1);
     return last_tag_;
-  } else if (buffer_size_ >= 2 && buffer_[1] < 0x80) {
-    last_tag_ = (buffer_[0] & 0x7f) + (buffer_[1] << 7);
-    Advance(2);
-    return last_tag_;
-  } else if (ReadVarint32Fallback(&last_tag_)) {
-    return last_tag_;
   } else {
-    last_tag_ = 0;
-    return 0;
+    last_tag_ = ReadTagFallback();
+    return last_tag_;
   }
 }
 
@@ -604,14 +821,14 @@ inline bool CodedInputStream::ConsumedEntireMessage() {
 
 inline bool CodedInputStream::ExpectTag(uint32 expected) {
   if (expected < (1 << 7)) {
-    if (buffer_size_ != 0 && buffer_[0] == expected) {
+    if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
       Advance(1);
       return true;
     } else {
       return false;
     }
   } else if (expected < (1 << 14)) {
-    if (buffer_size_ >= 2 &&
+    if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
         buffer_[0] == static_cast<uint8>(expected | 0x80) &&
         buffer_[1] == static_cast<uint8>(expected >> 7)) {
       Advance(2);
@@ -625,11 +842,32 @@ inline bool CodedInputStream::ExpectTag(uint32 expected) {
   }
 }
 
+inline const uint8* CodedInputStream::ExpectTagFromArray(
+    const uint8* buffer, uint32 expected) {
+  if (expected < (1 << 7)) {
+    if (buffer[0] == expected) {
+      return buffer + 1;
+    }
+  } else if (expected < (1 << 14)) {
+    if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
+        buffer[1] == static_cast<uint8>(expected >> 7)) {
+      return buffer + 2;
+    }
+  }
+  return NULL;
+}
+
+inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
+                                                           int* size) {
+  *data = buffer_;
+  *size = buffer_end_ - buffer_;
+}
+
 inline bool CodedInputStream::ExpectAtEnd() {
   // If we are at a limit we know no more bytes can be read.  Otherwise, it's
   // hard to say without calling Refresh(), and we'd rather not do that.
 
-  if (buffer_size_ == 0 && buffer_size_after_limit_ != 0) {
+  if (buffer_ == buffer_end_ && buffer_size_after_limit_ != 0) {
     last_tag_ = 0;                   // Pretend we called ReadTag()...
     legitimate_message_end_ = true;  // ... and it hit EOF.
     return true;
@@ -677,11 +915,11 @@ inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
 
 inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
                                                             uint8* target) {
-#if !defined(PROTOBUF_TEST_NOT_LITTLE_ENDIAN) && \
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
     defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
   memcpy(target, &value, sizeof(value));
 #else
-  target[0] = static_cast<uint8>(value      );
+  target[0] = static_cast<uint8>(value);
   target[1] = static_cast<uint8>(value >>  8);
   target[2] = static_cast<uint8>(value >> 16);
   target[3] = static_cast<uint8>(value >> 24);
@@ -691,18 +929,18 @@ inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
 
 inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
                                                             uint8* target) {
-#if !defined(PROTOBUF_TEST_NOT_LITTLE_ENDIAN) && \
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
     defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
   memcpy(target, &value, sizeof(value));
 #else
   uint32 part0 = static_cast<uint32>(value);
   uint32 part1 = static_cast<uint32>(value >> 32);
 
-  target[0] = static_cast<uint8>(part0      );
+  target[0] = static_cast<uint8>(part0);
   target[1] = static_cast<uint8>(part0 >>  8);
   target[2] = static_cast<uint8>(part0 >> 16);
   target[3] = static_cast<uint8>(part0 >> 24);
-  target[4] = static_cast<uint8>(part1      );
+  target[4] = static_cast<uint8>(part1);
   target[5] = static_cast<uint8>(part1 >>  8);
   target[6] = static_cast<uint8>(part1 >> 16);
   target[7] = static_cast<uint8>(part1 >> 24);
@@ -759,7 +997,6 @@ inline int CodedOutputStream::ByteCount() const {
 
 inline void CodedInputStream::Advance(int amount) {
   buffer_ += amount;
-  buffer_size_ -= amount;
 }
 
 inline void CodedOutputStream::Advance(int amount) {
@@ -780,6 +1017,72 @@ inline void CodedInputStream::DecrementRecursionDepth() {
   if (recursion_depth_ > 0) --recursion_depth_;
 }
 
+inline void CodedInputStream::SetExtensionRegistry(DescriptorPool* pool,
+                                                   MessageFactory* factory) {
+  extension_pool_ = pool;
+  extension_factory_ = factory;
+}
+
+inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
+  return extension_pool_;
+}
+
+inline MessageFactory* CodedInputStream::GetExtensionFactory() {
+  return extension_factory_;
+}
+
+inline int CodedInputStream::BufferSize() const {
+  return buffer_end_ - buffer_;
+}
+
+inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
+  : input_(input),
+    buffer_(NULL),
+    buffer_end_(NULL),
+    total_bytes_read_(0),
+    overflow_bytes_(0),
+    last_tag_(0),
+    legitimate_message_end_(false),
+    aliasing_enabled_(false),
+    current_limit_(INT_MAX),
+    buffer_size_after_limit_(0),
+    total_bytes_limit_(kDefaultTotalBytesLimit),
+    total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
+    recursion_depth_(0),
+    recursion_limit_(kDefaultRecursionLimit),
+    extension_pool_(NULL),
+    extension_factory_(NULL) {
+  // Eagerly Refresh() so buffer space is immediately available.
+  Refresh();
+}
+
+inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
+  : input_(NULL),
+    buffer_(buffer),
+    buffer_end_(buffer + size),
+    total_bytes_read_(size),
+    overflow_bytes_(0),
+    last_tag_(0),
+    legitimate_message_end_(false),
+    aliasing_enabled_(false),
+    current_limit_(size),
+    buffer_size_after_limit_(0),
+    total_bytes_limit_(kDefaultTotalBytesLimit),
+    total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
+    recursion_depth_(0),
+    recursion_limit_(kDefaultRecursionLimit),
+    extension_pool_(NULL),
+    extension_factory_(NULL) {
+  // Note that setting current_limit_ == size is important to prevent some
+  // code paths from trying to access input_ and segfaulting.
+}
+
+inline CodedInputStream::~CodedInputStream() {
+  if (input_ != NULL) {
+    BackUpInputToCurrentPosition();
+  }
+}
+
 }  // namespace io
 }  // namespace protobuf
 
diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc
index e165fb93..7d298332 100644
--- a/src/google/protobuf/io/coded_stream_unittest.cc
+++ b/src/google/protobuf/io/coded_stream_unittest.cc
@@ -242,6 +242,24 @@ TEST_1D(CodedStreamTest, ExpectTag, kVarintCases) {
   }
 }
 
+TEST_1D(CodedStreamTest, ExpectTagFromArray, kVarintCases) {
+  memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size);
+
+  const uint32 expected_value = static_cast<uint32>(kVarintCases_case.value);
+
+  // If the expectation succeeds, it should return a pointer past the tag.
+  if (kVarintCases_case.size <= 2) {
+    EXPECT_TRUE(NULL ==
+                CodedInputStream::ExpectTagFromArray(buffer_,
+                                                     expected_value + 1));
+    EXPECT_TRUE(buffer_ + kVarintCases_case.size ==
+                CodedInputStream::ExpectTagFromArray(buffer_, expected_value));
+  } else {
+    EXPECT_TRUE(NULL ==
+                CodedInputStream::ExpectTagFromArray(buffer_, expected_value));
+  }
+}
+
 TEST_2D(CodedStreamTest, ReadVarint64, kVarintCases, kBlockSizes) {
   memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size);
   ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
@@ -529,10 +547,32 @@ TEST_2D(CodedStreamTest, WriteLittleEndian64, kFixed64Cases, kBlockSizes) {
   EXPECT_EQ(0, memcmp(buffer_, kFixed64Cases_case.bytes, sizeof(uint64)));
 }
 
+// Tests using the static methods to read fixed-size values from raw arrays.
+
+TEST_1D(CodedStreamTest, ReadLittleEndian32FromArray, kFixed32Cases) {
+  memcpy(buffer_, kFixed32Cases_case.bytes, sizeof(kFixed32Cases_case.bytes));
+
+  uint32 value;
+  const uint8* end = CodedInputStream::ReadLittleEndian32FromArray(
+      buffer_, &value);
+  EXPECT_EQ(kFixed32Cases_case.value, value);
+  EXPECT_TRUE(end == buffer_ + sizeof(value));
+}
+
+TEST_1D(CodedStreamTest, ReadLittleEndian64FromArray, kFixed64Cases) {
+  memcpy(buffer_, kFixed64Cases_case.bytes, sizeof(kFixed64Cases_case.bytes));
+
+  uint64 value;
+  const uint8* end = CodedInputStream::ReadLittleEndian64FromArray(
+      buffer_, &value);
+  EXPECT_EQ(kFixed64Cases_case.value, value);
+  EXPECT_TRUE(end == buffer_ + sizeof(value));
+}
+
 // -------------------------------------------------------------------
 // Raw reads and writes
 
-const char kRawBytes[] = "Some bytes which will be writted and read raw.";
+const char kRawBytes[] = "Some bytes which will be written and read raw.";
 
 TEST_1D(CodedStreamTest, ReadRaw, kBlockSizes) {
   memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
@@ -593,6 +633,22 @@ TEST_1D(CodedStreamTest, ReadStringImpossiblyLarge, kBlockSizes) {
   }
 }
 
+TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnStack) {
+  // Same test as above, except directly use a buffer. This used to cause
+  // crashes while the above did not.
+  uint8 buffer[8];
+  CodedInputStream coded_input(buffer, 8);
+  string str;
+  EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
+}
+
+TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnHeap) {
+  scoped_array<uint8> buffer(new uint8[8]);
+  CodedInputStream coded_input(buffer.get(), 8);
+  string str;
+  EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
+}
+
 
 // -------------------------------------------------------------------
 // Skip
@@ -652,6 +708,36 @@ TEST_F(CodedStreamTest, GetDirectBufferPointerInput) {
   EXPECT_EQ(8, size);
 }
 
+TEST_F(CodedStreamTest, GetDirectBufferPointerInlineInput) {
+  ArrayInputStream input(buffer_, sizeof(buffer_), 8);
+  CodedInputStream coded_input(&input);
+
+  const void* ptr;
+  int size;
+
+  coded_input.GetDirectBufferPointerInline(&ptr, &size);
+  EXPECT_EQ(buffer_, ptr);
+  EXPECT_EQ(8, size);
+
+  // Peeking again should return the same pointer.
+  coded_input.GetDirectBufferPointerInline(&ptr, &size);
+  EXPECT_EQ(buffer_, ptr);
+  EXPECT_EQ(8, size);
+
+  // Skip forward in the same buffer then peek again.
+  EXPECT_TRUE(coded_input.Skip(3));
+  coded_input.GetDirectBufferPointerInline(&ptr, &size);
+  EXPECT_EQ(buffer_ + 3, ptr);
+  EXPECT_EQ(5, size);
+
+  // Skip to end of buffer and peek -- should return false and provide an empty
+  // buffer. It does not try to Refresh().
+  EXPECT_TRUE(coded_input.Skip(5));
+  coded_input.GetDirectBufferPointerInline(&ptr, &size);
+  EXPECT_EQ(buffer_ + 8, ptr);
+  EXPECT_EQ(0, size);
+}
+
 TEST_F(CodedStreamTest, GetDirectBufferPointerOutput) {
   ArrayOutputStream output(buffer_, sizeof(buffer_), 8);
   CodedOutputStream coded_output(&output);
diff --git a/src/google/protobuf/io/gzip_stream.cc b/src/google/protobuf/io/gzip_stream.cc
index 84d277f4..e1a35ea2 100644
--- a/src/google/protobuf/io/gzip_stream.cc
+++ b/src/google/protobuf/io/gzip_stream.cc
@@ -315,6 +315,6 @@ bool GzipOutputStream::Close() {
 
 }  // namespace io
 }  // namespace protobuf
-}  // namespace google
 
 #endif  // HAVE_ZLIB
+}  // namespace google
diff --git a/src/google/protobuf/io/printer.cc b/src/google/protobuf/io/printer.cc
index 937d777e..c7d3074d 100644
--- a/src/google/protobuf/io/printer.cc
+++ b/src/google/protobuf/io/printer.cc
@@ -65,10 +65,10 @@ void Printer::Print(const map<string, string>& variables, const char* text) {
     if (text[i] == '\n') {
       // Saw newline.  If there is more text, we may need to insert an indent
       // here.  So, write what we have so far, including the '\n'.
-      Write(text + pos, i - pos + 1);
+      WriteRaw(text + pos, i - pos + 1);
       pos = i + 1;
 
-      // Setting this true will cause the next Write() to insert an indent
+      // Setting this true will cause the next WriteRaw() to insert an indent
       // first.
       at_start_of_line_ = true;
 
@@ -76,7 +76,7 @@ void Printer::Print(const map<string, string>& variables, const char* text) {
       // Saw the start of a variable name.
 
       // Write what we have so far.
-      Write(text + pos, i - pos);
+      WriteRaw(text + pos, i - pos);
       pos = i + 1;
 
       // Find closing delimiter.
@@ -90,14 +90,14 @@ void Printer::Print(const map<string, string>& variables, const char* text) {
       string varname(text + pos, endpos - pos);
       if (varname.empty()) {
         // Two delimiters in a row reduce to a literal delimiter character.
-        Write(&variable_delimiter_, 1);
+        WriteRaw(&variable_delimiter_, 1);
       } else {
         // Replace with the variable's value.
         map<string, string>::const_iterator iter = variables.find(varname);
         if (iter == variables.end()) {
           GOOGLE_LOG(DFATAL) << " Undefined variable: " << varname;
         } else {
-          Write(iter->second.data(), iter->second.size());
+          WriteRaw(iter->second.data(), iter->second.size());
         }
       }
 
@@ -108,7 +108,7 @@ void Printer::Print(const map<string, string>& variables, const char* text) {
   }
 
   // Write the rest.
-  Write(text + pos, size - pos);
+  WriteRaw(text + pos, size - pos);
 }
 
 void Printer::Print(const char* text) {
@@ -145,14 +145,23 @@ void Printer::Outdent() {
   indent_.resize(indent_.size() - 2);
 }
 
-void Printer::Write(const char* data, int size) {
+void Printer::PrintRaw(const string& data) {
+  WriteRaw(data.data(), data.size());
+}
+
+void Printer::PrintRaw(const char* data) {
+  if (failed_) return;
+  WriteRaw(data, strlen(data));
+}
+
+void Printer::WriteRaw(const char* data, int size) {
   if (failed_) return;
   if (size == 0) return;
 
   if (at_start_of_line_) {
     // Insert an indent.
     at_start_of_line_ = false;
-    Write(indent_.data(), indent_.size());
+    WriteRaw(indent_.data(), indent_.size());
     if (failed_) return;
   }
 
diff --git a/src/google/protobuf/io/printer.h b/src/google/protobuf/io/printer.h
index b7c4cf39..de085389 100644
--- a/src/google/protobuf/io/printer.h
+++ b/src/google/protobuf/io/printer.h
@@ -59,8 +59,8 @@ class ZeroCopyOutputStream;     // zero_copy_stream.h
 // The above writes "My name is Bob." to the output stream.
 //
 // Printer aggressively enforces correct usage, crashing (with assert failures)
-// in the case of undefined variables.  This helps greatly in debugging code
-// which uses it.  This class is not intended to be used by production servers.
+// in the case of undefined variables in debug builds. This helps greatly in
+// debugging code which uses it.
 class LIBPROTOBUF_EXPORT Printer {
  public:
   // Create a printer that writes text to the given output stream.  Use the
@@ -94,15 +94,24 @@ class LIBPROTOBUF_EXPORT Printer {
   // level is zero.
   void Outdent();
 
+  // Write a string to the output buffer.
+  // This method does not look for newlines to add indentation.
+  void PrintRaw(const string& data);
+
+  // Write a zero-delimited string to output buffer.
+  // This method does not look for newlines to add indentation.
+  void PrintRaw(const char* data);
+
+  // Write some bytes to the output buffer.
+  // This method does not look for newlines to add indentation.
+  void WriteRaw(const char* data, int size);
+
   // True if any write to the underlying stream failed.  (We don't just
   // crash in this case because this is an I/O failure, not a programming
   // error.)
   bool failed() const { return failed_; }
 
  private:
-  // Write some text to the output buffer.
-  void Write(const char* data, int size);
-
   const char variable_delimiter_;
 
   ZeroCopyOutputStream* const output_;
diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc
index 69c7ee34..580a53da 100644
--- a/src/google/protobuf/io/printer_unittest.cc
+++ b/src/google/protobuf/io/printer_unittest.cc
@@ -76,10 +76,38 @@ TEST(Printer, BasicPrinting) {
 
     buffer[output.ByteCount()] = '\0';
 
-    EXPECT_STREQ(buffer,
-      "Hello World!  This is the same line.\n"
-      "But this is a new one.\n"
-      "And this is another one.");
+    EXPECT_STREQ("Hello World!  This is the same line.\n"
+                 "But this is a new one.\n"
+                 "And this is another one.",
+                 buffer);
+  }
+}
+
+TEST(Printer, WriteRaw) {
+  char buffer[8192];
+
+  for (int block_size = 1; block_size < 512; block_size *= 2) {
+    ArrayOutputStream output(buffer, sizeof(buffer), block_size);
+
+    {
+      string string_obj = "From an object\n";
+      Printer printer(&output, '$');
+      printer.WriteRaw("Hello World!", 12);
+      printer.PrintRaw("  This is the same line.\n");
+      printer.PrintRaw("But this is a new one.\nAnd this is another one.");
+      printer.WriteRaw("\n", 1);
+      printer.PrintRaw(string_obj);
+      EXPECT_FALSE(printer.failed());
+    }
+
+    buffer[output.ByteCount()] = '\0';
+
+    EXPECT_STREQ("Hello World!  This is the same line.\n"
+                 "But this is a new one.\n"
+                 "And this is another one."
+                 "\n"
+                 "From an object\n",
+                 buffer);
   }
 }
 
@@ -98,6 +126,7 @@ TEST(Printer, VariableSubstitution) {
       vars["abcdefg"] = "1234";
 
       printer.Print(vars, "Hello $foo$!\nbar = $bar$\n");
+      printer.PrintRaw("RawBit\n");
       printer.Print(vars, "$abcdefg$\nA literal dollar sign:  $$");
 
       vars["foo"] = "blah";
@@ -108,12 +137,13 @@ TEST(Printer, VariableSubstitution) {
 
     buffer[output.ByteCount()] = '\0';
 
-    EXPECT_STREQ(buffer,
-      "Hello World!\n"
-      "bar = $foo$\n"
-      "1234\n"
-      "A literal dollar sign:  $\n"
-      "Now foo = blah.");
+    EXPECT_STREQ("Hello World!\n"
+                 "bar = $foo$\n"
+                 "RawBit\n"
+                 "1234\n"
+                 "A literal dollar sign:  $\n"
+                 "Now foo = blah.",
+                 buffer);
   }
 }
 
@@ -125,15 +155,17 @@ TEST(Printer, InlineVariableSubstitution) {
   {
     Printer printer(&output, '$');
     printer.Print("Hello $foo$!\n", "foo", "World");
+    printer.PrintRaw("RawBit\n");
     printer.Print("$foo$ $bar$\n", "foo", "one", "bar", "two");
     EXPECT_FALSE(printer.failed());
   }
 
   buffer[output.ByteCount()] = '\0';
 
-  EXPECT_STREQ(buffer,
-    "Hello World!\n"
-    "one two\n");
+  EXPECT_STREQ("Hello World!\n"
+               "RawBit\n"
+               "one two\n",
+               buffer);
 }
 
 TEST(Printer, Indenting) {
@@ -156,6 +188,8 @@ TEST(Printer, Indenting) {
       printer.Indent();
       printer.Print("  And this is still the same line.\n"
                     "But this is indented.\n");
+      printer.PrintRaw("RawBit has indent at start\n");
+      printer.PrintRaw("but not after a raw newline\n");
       printer.Print(vars, "Note that a newline in a variable will break "
                     "indenting, as we see$newline$here.\n");
       printer.Indent();
@@ -169,16 +203,19 @@ TEST(Printer, Indenting) {
 
     buffer[output.ByteCount()] = '\0';
 
-    EXPECT_STREQ(buffer,
+    EXPECT_STREQ(
       "This is not indented.\n"
       "  This is indented\n"
       "  And so is this\n"
       "But this is not.  And this is still the same line.\n"
       "  But this is indented.\n"
-      "  Note that a newline in a variable will break indenting, as we see\n"
+      "  RawBit has indent at start\n"
+      "but not after a raw newline\n"
+      "Note that a newline in a variable will break indenting, as we see\n"
       "here.\n"
       "    And this is double-indented\n"
-      "Back to normal.");
+      "Back to normal.",
+      buffer);
   }
 }
 
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
index 0bda451b..75cbfed5 100644
--- a/src/google/protobuf/io/tokenizer.cc
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -119,7 +119,7 @@ namespace {
 CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' ||
                             c == '\r' || c == '\v');
 
-CHARACTER_CLASS(Unprintable, c < ' ' && c != '\0');
+CHARACTER_CLASS(Unprintable, c < ' ' && c > '\0');
 
 CHARACTER_CLASS(Digit, '0' <= c && c <= '9');
 CHARACTER_CLASS(OctalDigit, '0' <= c && c <= '7');
diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h
index 98386e0b..d115161f 100644
--- a/src/google/protobuf/io/tokenizer.h
+++ b/src/google/protobuf/io/tokenizer.h
@@ -63,6 +63,11 @@ class LIBPROTOBUF_EXPORT ErrorCollector {
   // 1 to each before printing them.
   virtual void AddError(int line, int column, const string& message) = 0;
 
+  // Indicates that there was a warning in the input at the given line and
+  // column numbers.  The numbers are zero-based, so you may want to add
+  // 1 to each before printing them.
+  virtual void AddWarning(int line, int column, const string& message) { }
+
  private:
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector);
 };
diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc
index eac1455d..3598e188 100644
--- a/src/google/protobuf/io/tokenizer_unittest.cc
+++ b/src/google/protobuf/io/tokenizer_unittest.cc
@@ -397,6 +397,12 @@ MultiTokenCase kMultiTokenCases[] = {
     { Tokenizer::TYPE_IDENTIFIER, "baz", 1,  0 },
     { Tokenizer::TYPE_END       , ""   , 1, 3 },
   }},
+
+  // Bytes with the high-order bit set should not be seen as control characters.
+  { "\300", {
+    { Tokenizer::TYPE_SYMBOL, "\300", 0, 0 },
+    { Tokenizer::TYPE_END   , ""    , 0, 1 },
+  }},
 };
 
 TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h
index 64e96cd4..9fedb005 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl.h
+++ b/src/google/protobuf/io/zero_copy_stream_impl.h
@@ -133,10 +133,11 @@ class LIBPROTOBUF_EXPORT FileInputStream : public ZeroCopyInputStream {
 
 // A ZeroCopyOutputStream which writes to a file descriptor.
 //
-// FileInputStream is preferred over using an ofstream with OstreamOutputStream.
-// The latter will introduce an extra layer of buffering, harming performance.
-// Also, it's conceivable that FileInputStream could someday be enhanced
-// to use zero-copy file descriptors on OSs which support them.
+// FileOutputStream is preferred over using an ofstream with
+// OstreamOutputStream.  The latter will introduce an extra layer of buffering,
+// harming performance.  Also, it's conceivable that FileOutputStream could
+// someday be enhanced to use zero-copy file descriptors on OSs which
+// support them.
 class LIBPROTOBUF_EXPORT FileOutputStream : public ZeroCopyOutputStream {
  public:
   // Creates a stream that writes to the given Unix file descriptor.
diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc
index f919b7ac..8229ee6d 100644
--- a/src/google/protobuf/io/zero_copy_stream_unittest.cc
+++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc
@@ -403,7 +403,8 @@ TEST_F(IoTest, CompressionOptions) {
 
   string golden;
   File::ReadFileToStringOrDie(
-      TestSourceDir() + "/google/protobuf/testdata/golden_message", &golden);
+    TestSourceDir() + "/google/protobuf/testdata/golden_message",
+    &golden);
 
   GzipOutputStream::Options options;
   string gzip_compressed = Compress(golden, options);
author	kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>	2009-12-18 02:11:36 +0000
committer	kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>	2009-12-18 02:11:36 +0000
commit	fccb146e3fe437b0df1e9c50d4b8e1080ddb4bd9 (patch)
tree	9f2d9fe0267d96a54e541377ffeada3d0bff0d1d /src/google/protobuf/io
parent	d5cf7b55a6a1f959d1646785f63ca2b62da78079 (diff)