aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/google/protobuf/io
diff options
context:
space:
mode:
authorGravatar kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2009-12-18 02:11:36 +0000
committerGravatar kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2009-12-18 02:11:36 +0000
commitfccb146e3fe437b0df1e9c50d4b8e1080ddb4bd9 (patch)
tree9f2d9fe0267d96a54e541377ffeada3d0bff0d1d /src/google/protobuf/io
parentd5cf7b55a6a1f959d1646785f63ca2b62da78079 (diff)
Massive roll-up of changes. See CHANGES.txt.
Diffstat (limited to 'src/google/protobuf/io')
-rw-r--r--src/google/protobuf/io/coded_stream.cc379
-rw-r--r--src/google/protobuf/io/coded_stream.h347
-rw-r--r--src/google/protobuf/io/coded_stream_unittest.cc88
-rw-r--r--src/google/protobuf/io/gzip_stream.cc2
-rw-r--r--src/google/protobuf/io/printer.cc25
-rw-r--r--src/google/protobuf/io/printer.h19
-rw-r--r--src/google/protobuf/io/printer_unittest.cc69
-rw-r--r--src/google/protobuf/io/tokenizer.cc2
-rw-r--r--src/google/protobuf/io/tokenizer.h5
-rw-r--r--src/google/protobuf/io/tokenizer_unittest.cc6
-rw-r--r--src/google/protobuf/io/zero_copy_stream_impl.h9
-rw-r--r--src/google/protobuf/io/zero_copy_stream_unittest.cc3
12 files changed, 697 insertions, 257 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc
index e17a4775..6a91a13d 100644
--- a/src/google/protobuf/io/coded_stream.cc
+++ b/src/google/protobuf/io/coded_stream.cc
@@ -38,9 +38,9 @@
// will not cross the end of the buffer, since we can avoid a lot
// of branching in this case.
-#include <stack>
+#include <google/protobuf/io/coded_stream_inl.h>
+#include <algorithm>
#include <limits.h>
-#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/stubs/common.h>
#include <google/protobuf/stubs/stl_util-inl.h>
@@ -52,11 +52,6 @@ namespace io {
namespace {
-static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB
-
-static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB
-static const int kDefaultRecursionLimit = 64;
-
static const int kMaxVarintBytes = 10;
static const int kMaxVarint32Bytes = 5;
@@ -65,72 +60,28 @@ static const int kMaxVarint32Bytes = 5;
// CodedInputStream ==================================================
-CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
- : input_(input),
- buffer_(NULL),
- buffer_size_(0),
- total_bytes_read_(0),
- overflow_bytes_(0),
- last_tag_(0),
- legitimate_message_end_(false),
- aliasing_enabled_(false),
- current_limit_(INT_MAX),
- buffer_size_after_limit_(0),
- total_bytes_limit_(kDefaultTotalBytesLimit),
- total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
- recursion_depth_(0),
- recursion_limit_(kDefaultRecursionLimit) {
- // Eagerly Refresh() so buffer space is immediately available.
- Refresh();
-}
-
-CodedInputStream::CodedInputStream(const uint8* buffer, int size)
- : input_(NULL),
- buffer_(buffer),
- buffer_size_(size),
- total_bytes_read_(size),
- overflow_bytes_(0),
- last_tag_(0),
- legitimate_message_end_(false),
- aliasing_enabled_(false),
- current_limit_(size),
- buffer_size_after_limit_(0),
- total_bytes_limit_(kDefaultTotalBytesLimit),
- total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
- recursion_depth_(0),
- recursion_limit_(kDefaultRecursionLimit) {
- // Note that setting current_limit_ == size is important to prevent some
- // code paths from trying to access input_ and segfaulting.
-}
-
-CodedInputStream::~CodedInputStream() {
- if (input_ != NULL) {
- BackUpInputToCurrentPosition();
- }
-}
-
void CodedInputStream::BackUpInputToCurrentPosition() {
- int backup_bytes = buffer_size_ + buffer_size_after_limit_ + overflow_bytes_;
+ int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_;
if (backup_bytes > 0) {
input_->BackUp(backup_bytes);
// total_bytes_read_ doesn't include overflow_bytes_.
- total_bytes_read_ -= buffer_size_ + buffer_size_after_limit_;
- buffer_size_ = 0;
+ total_bytes_read_ -= BufferSize() + buffer_size_after_limit_;
+ buffer_end_ = buffer_;
buffer_size_after_limit_ = 0;
overflow_bytes_ = 0;
}
}
inline void CodedInputStream::RecomputeBufferLimits() {
- buffer_size_ += buffer_size_after_limit_;
+ buffer_end_ += buffer_size_after_limit_;
int closest_limit = min(current_limit_, total_bytes_limit_);
if (closest_limit < total_bytes_read_) {
// The limit position is in the current buffer. We must adjust
// the buffer size accordingly.
buffer_size_after_limit_ = total_bytes_read_ - closest_limit;
- buffer_size_ -= buffer_size_after_limit_;
+ buffer_end_ -= buffer_size_after_limit_;
} else {
buffer_size_after_limit_ = 0;
}
@@ -139,7 +90,7 @@ inline void CodedInputStream::RecomputeBufferLimits() {
CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) {
// Current position relative to the beginning of the stream.
int current_position = total_bytes_read_ -
- (buffer_size_ + buffer_size_after_limit_);
+ (BufferSize() + buffer_size_after_limit_);
Limit old_limit = current_limit_;
@@ -176,7 +127,7 @@ void CodedInputStream::PopLimit(Limit limit) {
int CodedInputStream::BytesUntilLimit() {
if (current_limit_ == INT_MAX) return -1;
int current_position = total_bytes_read_ -
- (buffer_size_ + buffer_size_after_limit_);
+ (BufferSize() + buffer_size_after_limit_);
return current_limit_ - current_position;
}
@@ -186,7 +137,7 @@ void CodedInputStream::SetTotalBytesLimit(
// Make sure the limit isn't already past, since this could confuse other
// code.
int current_position = total_bytes_read_ -
- (buffer_size_ + buffer_size_after_limit_);
+ (BufferSize() + buffer_size_after_limit_);
total_bytes_limit_ = max(current_position, total_bytes_limit);
total_bytes_warning_threshold_ = warning_threshold;
RecomputeBufferLimits();
@@ -203,7 +154,9 @@ void CodedInputStream::PrintTotalBytesLimitError() {
bool CodedInputStream::Skip(int count) {
if (count < 0) return false; // security: count is often user-supplied
- if (count <= buffer_size_) {
+ const int original_buffer_size = BufferSize();
+
+ if (count <= original_buffer_size) {
// Just skipping within the current buffer. Easy.
Advance(count);
return true;
@@ -211,13 +164,13 @@ bool CodedInputStream::Skip(int count) {
if (buffer_size_after_limit_ > 0) {
// We hit a limit inside this buffer. Advance to the limit and fail.
- Advance(buffer_size_);
+ Advance(original_buffer_size);
return false;
}
- count -= buffer_size_;
+ count -= original_buffer_size;
buffer_ = NULL;
- buffer_size_ = 0;
+ buffer_end_ = buffer_;
// Make sure this skip doesn't try to skip past the current limit.
int closest_limit = min(current_limit_, total_bytes_limit_);
@@ -236,20 +189,21 @@ bool CodedInputStream::Skip(int count) {
}
bool CodedInputStream::GetDirectBufferPointer(const void** data, int* size) {
- if (buffer_size_ == 0 && !Refresh()) return false;
+ if (BufferSize() == 0 && !Refresh()) return false;
*data = buffer_;
- *size = buffer_size_;
+ *size = BufferSize();
return true;
}
bool CodedInputStream::ReadRaw(void* buffer, int size) {
- while (buffer_size_ < size) {
+ int current_buffer_size;
+ while ((current_buffer_size = BufferSize()) < size) {
// Reading past end of buffer. Copy what we have, then refresh.
- memcpy(buffer, buffer_, buffer_size_);
- buffer = reinterpret_cast<uint8*>(buffer) + buffer_size_;
- size -= buffer_size_;
- Advance(buffer_size_);
+ memcpy(buffer, buffer_, current_buffer_size);
+ buffer = reinterpret_cast<uint8*>(buffer) + current_buffer_size;
+ size -= current_buffer_size;
+ Advance(current_buffer_size);
if (!Refresh()) return false;
}
@@ -261,27 +215,25 @@ bool CodedInputStream::ReadRaw(void* buffer, int size) {
bool CodedInputStream::ReadString(string* buffer, int size) {
if (size < 0) return false; // security: size is often user-supplied
+ return InternalReadStringInline(buffer, size);
+}
+bool CodedInputStream::ReadStringFallback(string* buffer, int size) {
if (!buffer->empty()) {
buffer->clear();
}
- if (size < buffer_size_) {
- STLStringResizeUninitialized(buffer, size);
- memcpy((uint8*)buffer->data(), buffer_, size);
- Advance(size);
- return true;
- }
-
- while (buffer_size_ < size) {
+ int current_buffer_size;
+ while ((current_buffer_size = BufferSize()) < size) {
// Some STL implementations "helpfully" crash on buffer->append(NULL, 0).
- if (buffer_size_ != 0) {
+ if (current_buffer_size != 0) {
// Note: string1.append(string2) is O(string2.size()) (as opposed to
// O(string1.size() + string2.size()), which would be bad).
- buffer->append(reinterpret_cast<const char*>(buffer_), buffer_size_);
+ buffer->append(reinterpret_cast<const char*>(buffer_),
+ current_buffer_size);
}
- size -= buffer_size_;
- Advance(buffer_size_);
+ size -= current_buffer_size;
+ Advance(current_buffer_size);
if (!Refresh()) return false;
}
@@ -292,11 +244,11 @@ bool CodedInputStream::ReadString(string* buffer, int size) {
}
-bool CodedInputStream::ReadLittleEndian32(uint32* value) {
+bool CodedInputStream::ReadLittleEndian32Fallback(uint32* value) {
uint8 bytes[sizeof(*value)];
const uint8* ptr;
- if (buffer_size_ >= sizeof(*value)) {
+ if (BufferSize() >= sizeof(*value)) {
// Fast path: Enough bytes in the buffer to read directly.
ptr = buffer_;
Advance(sizeof(*value));
@@ -305,19 +257,15 @@ bool CodedInputStream::ReadLittleEndian32(uint32* value) {
if (!ReadRaw(bytes, sizeof(*value))) return false;
ptr = bytes;
}
-
- *value = (static_cast<uint32>(ptr[0]) ) |
- (static_cast<uint32>(ptr[1]) << 8) |
- (static_cast<uint32>(ptr[2]) << 16) |
- (static_cast<uint32>(ptr[3]) << 24);
+ ReadLittleEndian32FromArray(ptr, value);
return true;
}
-bool CodedInputStream::ReadLittleEndian64(uint64* value) {
+bool CodedInputStream::ReadLittleEndian64Fallback(uint64* value) {
uint8 bytes[sizeof(*value)];
const uint8* ptr;
- if (buffer_size_ >= sizeof(*value)) {
+ if (BufferSize() >= sizeof(*value)) {
// Fast path: Enough bytes in the buffer to read directly.
ptr = buffer_;
Advance(sizeof(*value));
@@ -326,99 +274,152 @@ bool CodedInputStream::ReadLittleEndian64(uint64* value) {
if (!ReadRaw(bytes, sizeof(*value))) return false;
ptr = bytes;
}
+ ReadLittleEndian64FromArray(ptr, value);
+ return true;
+}
+
+namespace {
+
+inline const uint8* ReadVarint32FromArray(
+ const uint8* buffer, uint32* value) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+inline const uint8* ReadVarint32FromArray(const uint8* buffer, uint32* value) {
+ // Fast path: We have enough bytes left in the buffer to guarantee that
+ // this read won't cross the end, so we can skip the checks.
+ const uint8* ptr = buffer;
+ uint32 b;
+ uint32 result;
+
+ b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done;
+
+ // If the input is larger than 32 bits, we still need to read it all
+ // and discard the high-order bits.
+ for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) {
+ b = *(ptr++); if (!(b & 0x80)) goto done;
+ }
+
+ // We have overrun the maximum size of a varint (10 bytes). Assume
+ // the data is corrupt.
+ return NULL;
+
+ done:
+ *value = result;
+ return ptr;
+}
+
+} // namespace
- uint32 part0 = (static_cast<uint32>(ptr[0]) ) |
- (static_cast<uint32>(ptr[1]) << 8) |
- (static_cast<uint32>(ptr[2]) << 16) |
- (static_cast<uint32>(ptr[3]) << 24);
- uint32 part1 = (static_cast<uint32>(ptr[4]) ) |
- (static_cast<uint32>(ptr[5]) << 8) |
- (static_cast<uint32>(ptr[6]) << 16) |
- (static_cast<uint32>(ptr[7]) << 24);
- *value = static_cast<uint64>(part0) |
- (static_cast<uint64>(part1) << 32);
+bool CodedInputStream::ReadVarint32Slow(uint32* value) {
+ uint64 result;
+ // Directly invoke ReadVarint64Fallback, since we already tried to optimize
+ // for one-byte varints.
+ if (!ReadVarint64Fallback(&result)) return false;
+ *value = (uint32)result;
return true;
}
bool CodedInputStream::ReadVarint32Fallback(uint32* value) {
- if (buffer_size_ >= kMaxVarintBytes ||
+ if (BufferSize() >= kMaxVarintBytes ||
// Optimization: If the varint ends at exactly the end of the buffer,
// we can detect that and still use the fast path.
- (buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) {
- // Fast path: We have enough bytes left in the buffer to guarantee that
- // this read won't cross the end, so we can skip the checks.
- const uint8* ptr = buffer_;
- uint32 b;
- uint32 result;
-
- b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done;
- b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
- b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
- b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
- b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done;
-
- // If the input is larger than 32 bits, we still need to read it all
- // and discard the high-order bits.
- for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) {
- b = *(ptr++); if (!(b & 0x80)) goto done;
- }
-
- // We have overrun the maximum size of a varint (10 bytes). Assume
- // the data is corrupt.
- return false;
-
- done:
- Advance(ptr - buffer_);
- *value = result;
+ (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
+ const uint8* end = ReadVarint32FromArray(buffer_, value);
+ if (end == NULL) return false;
+ buffer_ = end;
return true;
-
} else {
- // Optimization: If we're at a limit, detect that quickly. (This is
- // common when reading tags.)
- while (buffer_size_ == 0) {
- // Detect cases where we definitely hit a byte limit without calling
- // Refresh().
- if (// If we hit a limit, buffer_size_after_limit_ will be non-zero.
- buffer_size_after_limit_ > 0 &&
- // Make sure that the limit we hit is not total_bytes_limit_, since
- // in that case we still need to call Refresh() so that it prints an
- // error.
- total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) {
- // We hit a byte limit.
- legitimate_message_end_ = true;
- return false;
- }
+ // Really slow case: we will incur the cost of an extra function call here,
+ // but moving this out of line reduces the size of this function, which
+ // improves the common case. In micro benchmarks, this is worth about 10-15%
+ return ReadVarint32Slow(value);
+ }
+}
- // Call refresh.
- if (!Refresh()) {
- // Refresh failed. Make sure that it failed due to EOF, not because
- // we hit total_bytes_limit_, which, unlike normal limits, is not a
- // valid place to end a message.
- int current_position = total_bytes_read_ - buffer_size_after_limit_;
- if (current_position >= total_bytes_limit_) {
- // Hit total_bytes_limit_. But if we also hit the normal limit,
- // we're still OK.
- legitimate_message_end_ = current_limit_ == total_bytes_limit_;
- } else {
- legitimate_message_end_ = true;
- }
- return false;
+uint32 CodedInputStream::ReadTagSlow() {
+ if (buffer_ == buffer_end_) {
+ // Call refresh.
+ if (!Refresh()) {
+ // Refresh failed. Make sure that it failed due to EOF, not because
+ // we hit total_bytes_limit_, which, unlike normal limits, is not a
+ // valid place to end a message.
+ int current_position = total_bytes_read_ - buffer_size_after_limit_;
+ if (current_position >= total_bytes_limit_) {
+ // Hit total_bytes_limit_. But if we also hit the normal limit,
+ // we're still OK.
+ legitimate_message_end_ = current_limit_ == total_bytes_limit_;
+ } else {
+ legitimate_message_end_ = true;
}
+ return 0;
}
+ }
- // Slow path: Just do a 64-bit read.
- uint64 result;
- if (!ReadVarint64(&result)) return false;
- *value = (uint32)result;
- return true;
+ // For the slow path, just do a 64-bit read. Try to optimize for one-byte tags
+ // again, since we have now refreshed the buffer.
+ uint64 result;
+ if (!ReadVarint64(&result)) return 0;
+ return static_cast<uint32>(result);
+}
+
+uint32 CodedInputStream::ReadTagFallback() {
+ if (BufferSize() >= kMaxVarintBytes ||
+ // Optimization: If the varint ends at exactly the end of the buffer,
+ // we can detect that and still use the fast path.
+ (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
+ uint32 tag;
+ const uint8* end = ReadVarint32FromArray(buffer_, &tag);
+ if (end == NULL) {
+ return 0;
+ }
+ buffer_ = end;
+ return tag;
+ } else {
+ // We are commonly at a limit when attempting to read tags. Try to quickly
+ // detect this case without making another function call.
+ if (buffer_ == buffer_end_ && buffer_size_after_limit_ > 0 &&
+ // Make sure that the limit we hit is not total_bytes_limit_, since
+ // in that case we still need to call Refresh() so that it prints an
+ // error.
+ total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) {
+ // We hit a byte limit.
+ legitimate_message_end_ = true;
+ return 0;
+ }
+ return ReadTagSlow();
}
}
-bool CodedInputStream::ReadVarint64(uint64* value) {
- if (buffer_size_ >= kMaxVarintBytes ||
+bool CodedInputStream::ReadVarint64Slow(uint64* value) {
+ // Slow path: This read might cross the end of the buffer, so we
+ // need to check and refresh the buffer if and when it does.
+
+ uint64 result = 0;
+ int count = 0;
+ uint32 b;
+
+ do {
+ if (count == kMaxVarintBytes) return false;
+ while (buffer_ == buffer_end_) {
+ if (!Refresh()) return false;
+ }
+ b = *buffer_;
+ result |= static_cast<uint64>(b & 0x7F) << (7 * count);
+ Advance(1);
+ ++count;
+ } while (b & 0x80);
+
+ *value = result;
+ return true;
+}
+
+bool CodedInputStream::ReadVarint64Fallback(uint64* value) {
+ if (BufferSize() >= kMaxVarintBytes ||
// Optimization: If the varint ends at exactly the end of the buffer,
// we can detect that and still use the fast path.
- (buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) {
+ (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
// Fast path: We have enough bytes left in the buffer to guarantee that
// this read won't cross the end, so we can skip the checks.
@@ -442,7 +443,7 @@ bool CodedInputStream::ReadVarint64(uint64* value) {
// We have overrun the maximum size of a varint (10 bytes). The data
// must be corrupt.
- return false;
+ return NULL;
done:
Advance(ptr - buffer_);
@@ -450,33 +451,13 @@ bool CodedInputStream::ReadVarint64(uint64* value) {
(static_cast<uint64>(part1) << 28) |
(static_cast<uint64>(part2) << 56);
return true;
-
} else {
- // Slow path: This read might cross the end of the buffer, so we
- // need to check and refresh the buffer if and when it does.
-
- uint64 result = 0;
- int count = 0;
- uint32 b;
-
- do {
- if (count == kMaxVarintBytes) return false;
- while (buffer_size_ == 0) {
- if (!Refresh()) return false;
- }
- b = *buffer_;
- result |= static_cast<uint64>(b & 0x7F) << (7 * count);
- Advance(1);
- ++count;
- } while(b & 0x80);
-
- *value = result;
- return true;
+ return ReadVarint64Slow(value);
}
}
bool CodedInputStream::Refresh() {
- GOOGLE_DCHECK_EQ(buffer_size_, 0);
+ GOOGLE_DCHECK_EQ(0, BufferSize());
if (buffer_size_after_limit_ > 0 || overflow_bytes_ > 0 ||
total_bytes_read_ == current_limit_) {
@@ -507,25 +488,27 @@ bool CodedInputStream::Refresh() {
}
const void* void_buffer;
- if (input_->Next(&void_buffer, &buffer_size_)) {
+ int buffer_size;
+ if (input_->Next(&void_buffer, &buffer_size)) {
buffer_ = reinterpret_cast<const uint8*>(void_buffer);
- GOOGLE_CHECK_GE(buffer_size_, 0);
+ buffer_end_ = buffer_ + buffer_size;
+ GOOGLE_CHECK_GE(buffer_size, 0);
- if (total_bytes_read_ <= INT_MAX - buffer_size_) {
- total_bytes_read_ += buffer_size_;
+ if (total_bytes_read_ <= INT_MAX - buffer_size) {
+ total_bytes_read_ += buffer_size;
} else {
- // Overflow. Reset buffer_size_ to not include the bytes beyond INT_MAX.
+ // Overflow. Reset buffer_end_ to not include the bytes beyond INT_MAX.
// We can't get that far anyway, because total_bytes_limit_ is guaranteed
// to be less than it. We need to keep track of the number of bytes
// we discarded, though, so that we can call input_->BackUp() to back
// up over them on destruction.
// The following line is equivalent to:
- // overflow_bytes_ = total_bytes_read_ + buffer_size_ - INT_MAX;
+ // overflow_bytes_ = total_bytes_read_ + buffer_size - INT_MAX;
// except that it avoids overflows. Signed integer overflow has
// undefined results according to the C standard.
- overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size_);
- buffer_size_ -= overflow_bytes_;
+ overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size);
+ buffer_end_ -= overflow_bytes_;
total_bytes_read_ = INT_MAX;
}
@@ -533,7 +516,7 @@ bool CodedInputStream::Refresh() {
return true;
} else {
buffer_ = NULL;
- buffer_size_ = 0;
+ buffer_end_ = NULL;
return false;
}
}
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h
index fa023f35..dcbb0d45 100644
--- a/src/google/protobuf/io/coded_stream.h
+++ b/src/google/protobuf/io/coded_stream.h
@@ -114,10 +114,15 @@
#include <sys/param.h>
#endif // !_MSC_VER
#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/common.h> // for GOOGLE_PREDICT_TRUE macro
namespace google {
namespace protobuf {
+
+class DescriptorPool;
+class MessageFactory;
+
namespace io {
// Defined in this file.
@@ -166,6 +171,11 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// types of data not covered by the CodedInputStream interface.
bool GetDirectBufferPointer(const void** data, int* size);
+ // Like GetDirectBufferPointer, but this method is inlined, and does not
+ // attempt to Refresh() if the buffer is currently empty.
+ inline void GetDirectBufferPointerInline(const void** data,
+ int* size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
// Read raw bytes, copying them into the given buffer.
bool ReadRaw(void* buffer, int size);
@@ -177,6 +187,10 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// could claim that a string is going to be MAX_INT bytes long in order to
// crash the server because it can't allocate this much space at once.
bool ReadString(string* buffer, int size);
+ // Like the above, with inlined optimizations. This should only be used
+ // by the protobuf implementation.
+ inline bool InternalReadStringInline(string* buffer,
+ int size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
// Read a 32-bit little-endian integer.
@@ -184,6 +198,15 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// Read a 64-bit little-endian integer.
bool ReadLittleEndian64(uint64* value);
+ // These methods read from an externally provided buffer. The caller is
+ // responsible for ensuring that the buffer has sufficient space.
+ // Read a 32-bit little-endian integer.
+ static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
+ uint32* value);
+ // Read a 64-bit little-endian integer.
+ static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
+ uint64* value);
+
// Read an unsigned integer with Varint encoding, truncating to 32 bits.
// Reading a 32-bit value is equivalent to reading a 64-bit one and casting
// it to uint32, but may be more efficient.
@@ -208,6 +231,17 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// when given a constant parameter, but GCC doesn't want to inline by default.
bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+ // Like above, except this reads from the specified buffer. The caller is
+ // responsible for ensuring that the buffer is large enough to read a varint
+ // of the expected size. For best performance, use a compile-time constant as
+ // the expected tag parameter.
+ //
+ // Returns a pointer beyond the expected tag if it was found, or NULL if it
+ // was not.
+ static const uint8* ExpectTagFromArray(
+ const uint8* buffer,
+ uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
// Usually returns true if no more bytes can be read. Always returns false
// if more bytes can be read. If ExpectAtEnd() returns true, a subsequent
// call to LastTagWas() will act as if ReadTag() had been called and returned
@@ -318,12 +352,90 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// Decrements the recursion depth.
void DecrementRecursionDepth();
+ // Extension Registry ----------------------------------------------
+ // ADVANCED USAGE: 99.9% of people can ignore this section.
+ //
+ // By default, when parsing extensions, the parser looks for extension
+ // definitions in the pool which owns the outer message's Descriptor.
+ // However, you may call SetExtensionRegistry() to provide an alternative
+ // pool instead. This makes it possible, for example, to parse a message
+ // using a generated class, but represent some extensions using
+ // DynamicMessage.
+
+ // Set the pool used to look up extensions. Most users do not need to call
+ // this as the correct pool will be chosen automatically.
+ //
+ // WARNING: It is very easy to misuse this. Carefully read the requirements
+ // below. Do not use this unless you are sure you need it. Almost no one
+ // does.
+ //
+ // Let's say you are parsing a message into message object m, and you want
+ // to take advantage of SetExtensionRegistry(). You must follow these
+ // requirements:
+ //
+ // The given DescriptorPool must contain m->GetDescriptor(). It is not
+ // sufficient for it to simply contain a descriptor that has the same name
+ // and content -- it must be the *exact object*. In other words:
+ // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
+ // m->GetDescriptor());
+ // There are two ways to satisfy this requirement:
+ // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless
+ // because this is the pool that would be used anyway if you didn't call
+ // SetExtensionRegistry() at all.
+ // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
+ // "underlay". Read the documentation for DescriptorPool for more
+ // information about underlays.
+ //
+ // You must also provide a MessageFactory. This factory will be used to
+ // construct Message objects representing extensions. The factory's
+ // GetPrototype() MUST return non-NULL for any Descriptor which can be found
+ // through the provided pool.
+ //
+ // If the provided factory might return instances of protocol-compiler-
+ // generated (i.e. compiled-in) types, or if the outer message object m is
+ // a generated type, then the given factory MUST have this property: If
+ // GetPrototype() is given a Descriptor which resides in
+ // DescriptorPool::generated_pool(), the factory MUST return the same
+ // prototype which MessageFactory::generated_factory() would return. That
+ // is, given a descriptor for a generated type, the factory must return an
+ // instance of the generated class (NOT DynamicMessage). However, when
+ // given a descriptor for a type that is NOT in generated_pool, the factory
+ // is free to return any implementation.
+ //
+ // The reason for this requirement is that generated sub-objects may be
+ // accessed via the standard (non-reflection) extension accessor methods,
+ // and these methods will down-cast the object to the generated class type.
+ // If the object is not actually of that type, the results would be undefined.
+ // On the other hand, if an extension is not compiled in, then there is no
+ // way the code could end up accessing it via the standard accessors -- the
+ // only way to access the extension is via reflection. When using reflection,
+ // DynamicMessage and generated messages are indistinguishable, so it's fine
+ // if these objects are represented using DynamicMessage.
+ //
+ // Using DynamicMessageFactory on which you have called
+ // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
+ // above requirement.
+ //
+ // If either pool or factory is NULL, both must be NULL.
+ //
+ // Note that this feature is ignored when parsing "lite" messages as they do
+ // not have descriptors.
+ void SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory);
+
+ // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
+ // has been provided.
+ const DescriptorPool* GetExtensionPool();
+
+ // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
+ // factory has been provided.
+ MessageFactory* GetExtensionFactory();
+
private:
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
ZeroCopyInputStream* input_;
const uint8* buffer_;
- int buffer_size_; // size of current buffer
+ const uint8* buffer_end_; // pointer to the end of the buffer.
int total_bytes_read_; // total bytes read from input_, including
// the current buffer
@@ -334,7 +446,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// LastTagWas() stuff.
uint32 last_tag_; // result of last ReadTag().
- // This is set true by ReadVarint32Fallback() if it is called when exactly
+ // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
// at EOF, or by ExpectAtEnd() when it returns true. This happens when we
// reach the end of a message and attempt to read another tag.
bool legitimate_message_end_;
@@ -365,6 +477,12 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// Recursion depth limit, set by SetRecursionLimit().
int recursion_limit_;
+ // See SetExtensionRegistry().
+ const DescriptorPool* extension_pool_;
+ MessageFactory* extension_factory_;
+
+ // Private member functions.
+
// Advance the buffer by a given number of bytes.
void Advance(int amount);
@@ -379,10 +497,36 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
void PrintTotalBytesLimitError();
// Called when the buffer runs out to request more data. Implies an
- // Advance(buffer_size_).
+ // Advance(BufferSize()).
bool Refresh();
+ // When parsing varints, we optimize for the common case of small values, and
+ // then optimize for the case when the varint fits within the current buffer
+ // piece. The Fallback method is used when we can't use the one-byte
+ // optimization. The Slow method is yet another fallback when the buffer is
+ // not large enough. Making the slow path out-of-line speeds up the common
+ // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
+ // message crosses multiple buffers.
bool ReadVarint32Fallback(uint32* value);
+ bool ReadVarint64Fallback(uint64* value);
+ bool ReadVarint32Slow(uint32* value);
+ bool ReadVarint64Slow(uint64* value);
+ bool ReadLittleEndian32Fallback(uint32* value);
+ bool ReadLittleEndian64Fallback(uint64* value);
+ // Fallback/slow methods for reading tags. These do not update last_tag_,
+ // but will set legitimate_message_end_ if we are at the end of the input
+ // stream.
+ uint32 ReadTagFallback();
+ uint32 ReadTagSlow();
+ bool ReadStringFallback(string* buffer, int size);
+
+ // Return the size of the buffer.
+ int BufferSize() const;
+
+ static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB
+
+ static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB
+ static const int kDefaultRecursionLimit = 64;
};
// Class which encodes and writes binary data which is composed of varint-
@@ -568,7 +712,7 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
// methods optimize for that case.
inline bool CodedInputStream::ReadVarint32(uint32* value) {
- if (buffer_size_ != 0 && *buffer_ < 0x80) {
+ if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
*value = *buffer_;
Advance(1);
return true;
@@ -577,20 +721,93 @@ inline bool CodedInputStream::ReadVarint32(uint32* value) {
}
}
+inline bool CodedInputStream::ReadVarint64(uint64* value) {
+ if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
+ *value = *buffer_;
+ Advance(1);
+ return true;
+ } else {
+ return ReadVarint64Fallback(value);
+ }
+}
+
+// static
+inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
+ const uint8* buffer,
+ uint32* value) {
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+ defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+ memcpy(value, buffer, sizeof(*value));
+ return buffer + sizeof(*value);
+#else
+ *value = (static_cast<uint32>(buffer[0]) ) |
+ (static_cast<uint32>(buffer[1]) << 8) |
+ (static_cast<uint32>(buffer[2]) << 16) |
+ (static_cast<uint32>(buffer[3]) << 24);
+ return buffer + sizeof(*value);
+#endif
+}
+// static
+inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
+ const uint8* buffer,
+ uint64* value) {
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+ defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+ memcpy(value, buffer, sizeof(*value));
+ return buffer + sizeof(*value);
+#else
+ uint32 part0 = (static_cast<uint32>(buffer[0]) ) |
+ (static_cast<uint32>(buffer[1]) << 8) |
+ (static_cast<uint32>(buffer[2]) << 16) |
+ (static_cast<uint32>(buffer[3]) << 24);
+ uint32 part1 = (static_cast<uint32>(buffer[4]) ) |
+ (static_cast<uint32>(buffer[5]) << 8) |
+ (static_cast<uint32>(buffer[6]) << 16) |
+ (static_cast<uint32>(buffer[7]) << 24);
+ *value = static_cast<uint64>(part0) |
+ (static_cast<uint64>(part1) << 32);
+ return buffer + sizeof(*value);
+#endif
+}
+
+inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+ defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+ if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
+ memcpy(value, buffer_, sizeof(*value));
+ Advance(sizeof(*value));
+ return true;
+ } else {
+ return ReadLittleEndian32Fallback(value);
+ }
+#else
+ return ReadLittleEndian32Fallback(value);
+#endif
+}
+
+inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+ defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+ if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
+ memcpy(value, buffer_, sizeof(*value));
+ Advance(sizeof(*value));
+ return true;
+ } else {
+ return ReadLittleEndian64Fallback(value);
+ }
+#else
+ return ReadLittleEndian64Fallback(value);
+#endif
+}
+
inline uint32 CodedInputStream::ReadTag() {
- if (buffer_size_ != 0 && buffer_[0] < 0x80) {
+ if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] < 0x80) {
last_tag_ = buffer_[0];
Advance(1);
return last_tag_;
- } else if (buffer_size_ >= 2 && buffer_[1] < 0x80) {
- last_tag_ = (buffer_[0] & 0x7f) + (buffer_[1] << 7);
- Advance(2);
- return last_tag_;
- } else if (ReadVarint32Fallback(&last_tag_)) {
- return last_tag_;
} else {
- last_tag_ = 0;
- return 0;
+ last_tag_ = ReadTagFallback();
+ return last_tag_;
}
}
@@ -604,14 +821,14 @@ inline bool CodedInputStream::ConsumedEntireMessage() {
inline bool CodedInputStream::ExpectTag(uint32 expected) {
if (expected < (1 << 7)) {
- if (buffer_size_ != 0 && buffer_[0] == expected) {
+ if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
Advance(1);
return true;
} else {
return false;
}
} else if (expected < (1 << 14)) {
- if (buffer_size_ >= 2 &&
+ if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
buffer_[0] == static_cast<uint8>(expected | 0x80) &&
buffer_[1] == static_cast<uint8>(expected >> 7)) {
Advance(2);
@@ -625,11 +842,32 @@ inline bool CodedInputStream::ExpectTag(uint32 expected) {
}
}
+inline const uint8* CodedInputStream::ExpectTagFromArray(
+ const uint8* buffer, uint32 expected) {
+ if (expected < (1 << 7)) {
+ if (buffer[0] == expected) {
+ return buffer + 1;
+ }
+ } else if (expected < (1 << 14)) {
+ if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
+ buffer[1] == static_cast<uint8>(expected >> 7)) {
+ return buffer + 2;
+ }
+ }
+ return NULL;
+}
+
+inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
+ int* size) {
+ *data = buffer_;
+ *size = buffer_end_ - buffer_;
+}
+
inline bool CodedInputStream::ExpectAtEnd() {
// If we are at a limit we know no more bytes can be read. Otherwise, it's
// hard to say without calling Refresh(), and we'd rather not do that.
- if (buffer_size_ == 0 && buffer_size_after_limit_ != 0) {
+ if (buffer_ == buffer_end_ && buffer_size_after_limit_ != 0) {
last_tag_ = 0; // Pretend we called ReadTag()...
legitimate_message_end_ = true; // ... and it hit EOF.
return true;
@@ -677,11 +915,11 @@ inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
uint8* target) {
-#if !defined(PROTOBUF_TEST_NOT_LITTLE_ENDIAN) && \
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
memcpy(target, &value, sizeof(value));
#else
- target[0] = static_cast<uint8>(value );
+ target[0] = static_cast<uint8>(value);
target[1] = static_cast<uint8>(value >> 8);
target[2] = static_cast<uint8>(value >> 16);
target[3] = static_cast<uint8>(value >> 24);
@@ -691,18 +929,18 @@ inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
uint8* target) {
-#if !defined(PROTOBUF_TEST_NOT_LITTLE_ENDIAN) && \
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
memcpy(target, &value, sizeof(value));
#else
uint32 part0 = static_cast<uint32>(value);
uint32 part1 = static_cast<uint32>(value >> 32);
- target[0] = static_cast<uint8>(part0 );
+ target[0] = static_cast<uint8>(part0);
target[1] = static_cast<uint8>(part0 >> 8);
target[2] = static_cast<uint8>(part0 >> 16);
target[3] = static_cast<uint8>(part0 >> 24);
- target[4] = static_cast<uint8>(part1 );
+ target[4] = static_cast<uint8>(part1);
target[5] = static_cast<uint8>(part1 >> 8);
target[6] = static_cast<uint8>(part1 >> 16);
target[7] = static_cast<uint8>(part1 >> 24);
@@ -759,7 +997,6 @@ inline int CodedOutputStream::ByteCount() const {
inline void CodedInputStream::Advance(int amount) {
buffer_ += amount;
- buffer_size_ -= amount;
}
inline void CodedOutputStream::Advance(int amount) {
@@ -780,6 +1017,72 @@ inline void CodedInputStream::DecrementRecursionDepth() {
if (recursion_depth_ > 0) --recursion_depth_;
}
+inline void CodedInputStream::SetExtensionRegistry(DescriptorPool* pool,
+ MessageFactory* factory) {
+ extension_pool_ = pool;
+ extension_factory_ = factory;
+}
+
+inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
+ return extension_pool_;
+}
+
+inline MessageFactory* CodedInputStream::GetExtensionFactory() {
+ return extension_factory_;
+}
+
+inline int CodedInputStream::BufferSize() const {
+ return buffer_end_ - buffer_;
+}
+
+inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
+ : input_(input),
+ buffer_(NULL),
+ buffer_end_(NULL),
+ total_bytes_read_(0),
+ overflow_bytes_(0),
+ last_tag_(0),
+ legitimate_message_end_(false),
+ aliasing_enabled_(false),
+ current_limit_(INT_MAX),
+ buffer_size_after_limit_(0),
+ total_bytes_limit_(kDefaultTotalBytesLimit),
+ total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
+ recursion_depth_(0),
+ recursion_limit_(kDefaultRecursionLimit),
+ extension_pool_(NULL),
+ extension_factory_(NULL) {
+ // Eagerly Refresh() so buffer space is immediately available.
+ Refresh();
+}
+
+inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
+ : input_(NULL),
+ buffer_(buffer),
+ buffer_end_(buffer + size),
+ total_bytes_read_(size),
+ overflow_bytes_(0),
+ last_tag_(0),
+ legitimate_message_end_(false),
+ aliasing_enabled_(false),
+ current_limit_(size),
+ buffer_size_after_limit_(0),
+ total_bytes_limit_(kDefaultTotalBytesLimit),
+ total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
+ recursion_depth_(0),
+ recursion_limit_(kDefaultRecursionLimit),
+ extension_pool_(NULL),
+ extension_factory_(NULL) {
+ // Note that setting current_limit_ == size is important to prevent some
+ // code paths from trying to access input_ and segfaulting.
+}
+
+inline CodedInputStream::~CodedInputStream() {
+ if (input_ != NULL) {
+ BackUpInputToCurrentPosition();
+ }
+}
+
} // namespace io
} // namespace protobuf
diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc
index e165fb93..7d298332 100644
--- a/src/google/protobuf/io/coded_stream_unittest.cc
+++ b/src/google/protobuf/io/coded_stream_unittest.cc
@@ -242,6 +242,24 @@ TEST_1D(CodedStreamTest, ExpectTag, kVarintCases) {
}
}
+TEST_1D(CodedStreamTest, ExpectTagFromArray, kVarintCases) {
+ memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size);
+
+ const uint32 expected_value = static_cast<uint32>(kVarintCases_case.value);
+
+ // If the expectation succeeds, it should return a pointer past the tag.
+ if (kVarintCases_case.size <= 2) {
+ EXPECT_TRUE(NULL ==
+ CodedInputStream::ExpectTagFromArray(buffer_,
+ expected_value + 1));
+ EXPECT_TRUE(buffer_ + kVarintCases_case.size ==
+ CodedInputStream::ExpectTagFromArray(buffer_, expected_value));
+ } else {
+ EXPECT_TRUE(NULL ==
+ CodedInputStream::ExpectTagFromArray(buffer_, expected_value));
+ }
+}
+
TEST_2D(CodedStreamTest, ReadVarint64, kVarintCases, kBlockSizes) {
memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size);
ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
@@ -529,10 +547,32 @@ TEST_2D(CodedStreamTest, WriteLittleEndian64, kFixed64Cases, kBlockSizes) {
EXPECT_EQ(0, memcmp(buffer_, kFixed64Cases_case.bytes, sizeof(uint64)));
}
+// Tests using the static methods to read fixed-size values from raw arrays.
+
+TEST_1D(CodedStreamTest, ReadLittleEndian32FromArray, kFixed32Cases) {
+ memcpy(buffer_, kFixed32Cases_case.bytes, sizeof(kFixed32Cases_case.bytes));
+
+ uint32 value;
+ const uint8* end = CodedInputStream::ReadLittleEndian32FromArray(
+ buffer_, &value);
+ EXPECT_EQ(kFixed32Cases_case.value, value);
+ EXPECT_TRUE(end == buffer_ + sizeof(value));
+}
+
+TEST_1D(CodedStreamTest, ReadLittleEndian64FromArray, kFixed64Cases) {
+ memcpy(buffer_, kFixed64Cases_case.bytes, sizeof(kFixed64Cases_case.bytes));
+
+ uint64 value;
+ const uint8* end = CodedInputStream::ReadLittleEndian64FromArray(
+ buffer_, &value);
+ EXPECT_EQ(kFixed64Cases_case.value, value);
+ EXPECT_TRUE(end == buffer_ + sizeof(value));
+}
+
// -------------------------------------------------------------------
// Raw reads and writes
-const char kRawBytes[] = "Some bytes which will be writted and read raw.";
+const char kRawBytes[] = "Some bytes which will be written and read raw.";
TEST_1D(CodedStreamTest, ReadRaw, kBlockSizes) {
memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
@@ -593,6 +633,22 @@ TEST_1D(CodedStreamTest, ReadStringImpossiblyLarge, kBlockSizes) {
}
}
+TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnStack) {
+ // Same test as above, except directly use a buffer. This used to cause
+ // crashes while the above did not.
+ uint8 buffer[8];
+ CodedInputStream coded_input(buffer, 8);
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
+}
+
+TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnHeap) {
+ scoped_array<uint8> buffer(new uint8[8]);
+ CodedInputStream coded_input(buffer.get(), 8);
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
+}
+
// -------------------------------------------------------------------
// Skip
@@ -652,6 +708,36 @@ TEST_F(CodedStreamTest, GetDirectBufferPointerInput) {
EXPECT_EQ(8, size);
}
+TEST_F(CodedStreamTest, GetDirectBufferPointerInlineInput) {
+ ArrayInputStream input(buffer_, sizeof(buffer_), 8);
+ CodedInputStream coded_input(&input);
+
+ const void* ptr;
+ int size;
+
+ coded_input.GetDirectBufferPointerInline(&ptr, &size);
+ EXPECT_EQ(buffer_, ptr);
+ EXPECT_EQ(8, size);
+
+ // Peeking again should return the same pointer.
+ coded_input.GetDirectBufferPointerInline(&ptr, &size);
+ EXPECT_EQ(buffer_, ptr);
+ EXPECT_EQ(8, size);
+
+ // Skip forward in the same buffer then peek again.
+ EXPECT_TRUE(coded_input.Skip(3));
+ coded_input.GetDirectBufferPointerInline(&ptr, &size);
+ EXPECT_EQ(buffer_ + 3, ptr);
+ EXPECT_EQ(5, size);
+
+ // Skip to end of buffer and peek -- should return false and provide an empty
+ // buffer. It does not try to Refresh().
+ EXPECT_TRUE(coded_input.Skip(5));
+ coded_input.GetDirectBufferPointerInline(&ptr, &size);
+ EXPECT_EQ(buffer_ + 8, ptr);
+ EXPECT_EQ(0, size);
+}
+
TEST_F(CodedStreamTest, GetDirectBufferPointerOutput) {
ArrayOutputStream output(buffer_, sizeof(buffer_), 8);
CodedOutputStream coded_output(&output);
diff --git a/src/google/protobuf/io/gzip_stream.cc b/src/google/protobuf/io/gzip_stream.cc
index 84d277f4..e1a35ea2 100644
--- a/src/google/protobuf/io/gzip_stream.cc
+++ b/src/google/protobuf/io/gzip_stream.cc
@@ -315,6 +315,6 @@ bool GzipOutputStream::Close() {
} // namespace io
} // namespace protobuf
-} // namespace google
#endif // HAVE_ZLIB
+} // namespace google
diff --git a/src/google/protobuf/io/printer.cc b/src/google/protobuf/io/printer.cc
index 937d777e..c7d3074d 100644
--- a/src/google/protobuf/io/printer.cc
+++ b/src/google/protobuf/io/printer.cc
@@ -65,10 +65,10 @@ void Printer::Print(const map<string, string>& variables, const char* text) {
if (text[i] == '\n') {
// Saw newline. If there is more text, we may need to insert an indent
// here. So, write what we have so far, including the '\n'.
- Write(text + pos, i - pos + 1);
+ WriteRaw(text + pos, i - pos + 1);
pos = i + 1;
- // Setting this true will cause the next Write() to insert an indent
+ // Setting this true will cause the next WriteRaw() to insert an indent
// first.
at_start_of_line_ = true;
@@ -76,7 +76,7 @@ void Printer::Print(const map<string, string>& variables, const char* text) {
// Saw the start of a variable name.
// Write what we have so far.
- Write(text + pos, i - pos);
+ WriteRaw(text + pos, i - pos);
pos = i + 1;
// Find closing delimiter.
@@ -90,14 +90,14 @@ void Printer::Print(const map<string, string>& variables, const char* text) {
string varname(text + pos, endpos - pos);
if (varname.empty()) {
// Two delimiters in a row reduce to a literal delimiter character.
- Write(&variable_delimiter_, 1);
+ WriteRaw(&variable_delimiter_, 1);
} else {
// Replace with the variable's value.
map<string, string>::const_iterator iter = variables.find(varname);
if (iter == variables.end()) {
GOOGLE_LOG(DFATAL) << " Undefined variable: " << varname;
} else {
- Write(iter->second.data(), iter->second.size());
+ WriteRaw(iter->second.data(), iter->second.size());
}
}
@@ -108,7 +108,7 @@ void Printer::Print(const map<string, string>& variables, const char* text) {
}
// Write the rest.
- Write(text + pos, size - pos);
+ WriteRaw(text + pos, size - pos);
}
void Printer::Print(const char* text) {
@@ -145,14 +145,23 @@ void Printer::Outdent() {
indent_.resize(indent_.size() - 2);
}
-void Printer::Write(const char* data, int size) {
+void Printer::PrintRaw(const string& data) {
+ WriteRaw(data.data(), data.size());
+}
+
+void Printer::PrintRaw(const char* data) {
+ if (failed_) return;
+ WriteRaw(data, strlen(data));
+}
+
+void Printer::WriteRaw(const char* data, int size) {
if (failed_) return;
if (size == 0) return;
if (at_start_of_line_) {
// Insert an indent.
at_start_of_line_ = false;
- Write(indent_.data(), indent_.size());
+ WriteRaw(indent_.data(), indent_.size());
if (failed_) return;
}
diff --git a/src/google/protobuf/io/printer.h b/src/google/protobuf/io/printer.h
index b7c4cf39..de085389 100644
--- a/src/google/protobuf/io/printer.h
+++ b/src/google/protobuf/io/printer.h
@@ -59,8 +59,8 @@ class ZeroCopyOutputStream; // zero_copy_stream.h
// The above writes "My name is Bob." to the output stream.
//
// Printer aggressively enforces correct usage, crashing (with assert failures)
-// in the case of undefined variables. This helps greatly in debugging code
-// which uses it. This class is not intended to be used by production servers.
+// in the case of undefined variables in debug builds. This helps greatly in
+// debugging code which uses it.
class LIBPROTOBUF_EXPORT Printer {
public:
// Create a printer that writes text to the given output stream. Use the
@@ -94,15 +94,24 @@ class LIBPROTOBUF_EXPORT Printer {
// level is zero.
void Outdent();
+ // Write a string to the output buffer.
+ // This method does not look for newlines to add indentation.
+ void PrintRaw(const string& data);
+
+ // Write a zero-delimited string to output buffer.
+ // This method does not look for newlines to add indentation.
+ void PrintRaw(const char* data);
+
+ // Write some bytes to the output buffer.
+ // This method does not look for newlines to add indentation.
+ void WriteRaw(const char* data, int size);
+
// True if any write to the underlying stream failed. (We don't just
// crash in this case because this is an I/O failure, not a programming
// error.)
bool failed() const { return failed_; }
private:
- // Write some text to the output buffer.
- void Write(const char* data, int size);
-
const char variable_delimiter_;
ZeroCopyOutputStream* const output_;
diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc
index 69c7ee34..580a53da 100644
--- a/src/google/protobuf/io/printer_unittest.cc
+++ b/src/google/protobuf/io/printer_unittest.cc
@@ -76,10 +76,38 @@ TEST(Printer, BasicPrinting) {
buffer[output.ByteCount()] = '\0';
- EXPECT_STREQ(buffer,
- "Hello World! This is the same line.\n"
- "But this is a new one.\n"
- "And this is another one.");
+ EXPECT_STREQ("Hello World! This is the same line.\n"
+ "But this is a new one.\n"
+ "And this is another one.",
+ buffer);
+ }
+}
+
+TEST(Printer, WriteRaw) {
+ char buffer[8192];
+
+ for (int block_size = 1; block_size < 512; block_size *= 2) {
+ ArrayOutputStream output(buffer, sizeof(buffer), block_size);
+
+ {
+ string string_obj = "From an object\n";
+ Printer printer(&output, '$');
+ printer.WriteRaw("Hello World!", 12);
+ printer.PrintRaw(" This is the same line.\n");
+ printer.PrintRaw("But this is a new one.\nAnd this is another one.");
+ printer.WriteRaw("\n", 1);
+ printer.PrintRaw(string_obj);
+ EXPECT_FALSE(printer.failed());
+ }
+
+ buffer[output.ByteCount()] = '\0';
+
+ EXPECT_STREQ("Hello World! This is the same line.\n"
+ "But this is a new one.\n"
+ "And this is another one."
+ "\n"
+ "From an object\n",
+ buffer);
}
}
@@ -98,6 +126,7 @@ TEST(Printer, VariableSubstitution) {
vars["abcdefg"] = "1234";
printer.Print(vars, "Hello $foo$!\nbar = $bar$\n");
+ printer.PrintRaw("RawBit\n");
printer.Print(vars, "$abcdefg$\nA literal dollar sign: $$");
vars["foo"] = "blah";
@@ -108,12 +137,13 @@ TEST(Printer, VariableSubstitution) {
buffer[output.ByteCount()] = '\0';
- EXPECT_STREQ(buffer,
- "Hello World!\n"
- "bar = $foo$\n"
- "1234\n"
- "A literal dollar sign: $\n"
- "Now foo = blah.");
+ EXPECT_STREQ("Hello World!\n"
+ "bar = $foo$\n"
+ "RawBit\n"
+ "1234\n"
+ "A literal dollar sign: $\n"
+ "Now foo = blah.",
+ buffer);
}
}
@@ -125,15 +155,17 @@ TEST(Printer, InlineVariableSubstitution) {
{
Printer printer(&output, '$');
printer.Print("Hello $foo$!\n", "foo", "World");
+ printer.PrintRaw("RawBit\n");
printer.Print("$foo$ $bar$\n", "foo", "one", "bar", "two");
EXPECT_FALSE(printer.failed());
}
buffer[output.ByteCount()] = '\0';
- EXPECT_STREQ(buffer,
- "Hello World!\n"
- "one two\n");
+ EXPECT_STREQ("Hello World!\n"
+ "RawBit\n"
+ "one two\n",
+ buffer);
}
TEST(Printer, Indenting) {
@@ -156,6 +188,8 @@ TEST(Printer, Indenting) {
printer.Indent();
printer.Print(" And this is still the same line.\n"
"But this is indented.\n");
+ printer.PrintRaw("RawBit has indent at start\n");
+ printer.PrintRaw("but not after a raw newline\n");
printer.Print(vars, "Note that a newline in a variable will break "
"indenting, as we see$newline$here.\n");
printer.Indent();
@@ -169,16 +203,19 @@ TEST(Printer, Indenting) {
buffer[output.ByteCount()] = '\0';
- EXPECT_STREQ(buffer,
+ EXPECT_STREQ(
"This is not indented.\n"
" This is indented\n"
" And so is this\n"
"But this is not. And this is still the same line.\n"
" But this is indented.\n"
- " Note that a newline in a variable will break indenting, as we see\n"
+ " RawBit has indent at start\n"
+ "but not after a raw newline\n"
+ "Note that a newline in a variable will break indenting, as we see\n"
"here.\n"
" And this is double-indented\n"
- "Back to normal.");
+ "Back to normal.",
+ buffer);
}
}
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
index 0bda451b..75cbfed5 100644
--- a/src/google/protobuf/io/tokenizer.cc
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -119,7 +119,7 @@ namespace {
CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' ||
c == '\r' || c == '\v');
-CHARACTER_CLASS(Unprintable, c < ' ' && c != '\0');
+CHARACTER_CLASS(Unprintable, c < ' ' && c > '\0');
CHARACTER_CLASS(Digit, '0' <= c && c <= '9');
CHARACTER_CLASS(OctalDigit, '0' <= c && c <= '7');
diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h
index 98386e0b..d115161f 100644
--- a/src/google/protobuf/io/tokenizer.h
+++ b/src/google/protobuf/io/tokenizer.h
@@ -63,6 +63,11 @@ class LIBPROTOBUF_EXPORT ErrorCollector {
// 1 to each before printing them.
virtual void AddError(int line, int column, const string& message) = 0;
+ // Indicates that there was a warning in the input at the given line and
+ // column numbers. The numbers are zero-based, so you may want to add
+ // 1 to each before printing them.
+ virtual void AddWarning(int line, int column, const string& message) { }
+
private:
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector);
};
diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc
index eac1455d..3598e188 100644
--- a/src/google/protobuf/io/tokenizer_unittest.cc
+++ b/src/google/protobuf/io/tokenizer_unittest.cc
@@ -397,6 +397,12 @@ MultiTokenCase kMultiTokenCases[] = {
{ Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0 },
{ Tokenizer::TYPE_END , "" , 1, 3 },
}},
+
+ // Bytes with the high-order bit set should not be seen as control characters.
+ { "\300", {
+ { Tokenizer::TYPE_SYMBOL, "\300", 0, 0 },
+ { Tokenizer::TYPE_END , "" , 0, 1 },
+ }},
};
TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h
index 64e96cd4..9fedb005 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl.h
+++ b/src/google/protobuf/io/zero_copy_stream_impl.h
@@ -133,10 +133,11 @@ class LIBPROTOBUF_EXPORT FileInputStream : public ZeroCopyInputStream {
// A ZeroCopyOutputStream which writes to a file descriptor.
//
-// FileInputStream is preferred over using an ofstream with OstreamOutputStream.
-// The latter will introduce an extra layer of buffering, harming performance.
-// Also, it's conceivable that FileInputStream could someday be enhanced
-// to use zero-copy file descriptors on OSs which support them.
+// FileOutputStream is preferred over using an ofstream with
+// OstreamOutputStream. The latter will introduce an extra layer of buffering,
+// harming performance. Also, it's conceivable that FileOutputStream could
+// someday be enhanced to use zero-copy file descriptors on OSs which
+// support them.
class LIBPROTOBUF_EXPORT FileOutputStream : public ZeroCopyOutputStream {
public:
// Creates a stream that writes to the given Unix file descriptor.
diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc
index f919b7ac..8229ee6d 100644
--- a/src/google/protobuf/io/zero_copy_stream_unittest.cc
+++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc
@@ -403,7 +403,8 @@ TEST_F(IoTest, CompressionOptions) {
string golden;
File::ReadFileToStringOrDie(
- TestSourceDir() + "/google/protobuf/testdata/golden_message", &golden);
+ TestSourceDir() + "/google/protobuf/testdata/golden_message",
+ &golden);
GzipOutputStream::Options options;
string gzip_compressed = Compress(golden, options);