diff options
author | kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> | 2009-12-18 02:11:36 +0000 |
---|---|---|
committer | kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> | 2009-12-18 02:11:36 +0000 |
commit | fccb146e3fe437b0df1e9c50d4b8e1080ddb4bd9 (patch) | |
tree | 9f2d9fe0267d96a54e541377ffeada3d0bff0d1d /src/google/protobuf/io | |
parent | d5cf7b55a6a1f959d1646785f63ca2b62da78079 (diff) |
Massive roll-up of changes. See CHANGES.txt.
Diffstat (limited to 'src/google/protobuf/io')
-rw-r--r-- | src/google/protobuf/io/coded_stream.cc | 379 | ||||
-rw-r--r-- | src/google/protobuf/io/coded_stream.h | 347 | ||||
-rw-r--r-- | src/google/protobuf/io/coded_stream_unittest.cc | 88 | ||||
-rw-r--r-- | src/google/protobuf/io/gzip_stream.cc | 2 | ||||
-rw-r--r-- | src/google/protobuf/io/printer.cc | 25 | ||||
-rw-r--r-- | src/google/protobuf/io/printer.h | 19 | ||||
-rw-r--r-- | src/google/protobuf/io/printer_unittest.cc | 69 | ||||
-rw-r--r-- | src/google/protobuf/io/tokenizer.cc | 2 | ||||
-rw-r--r-- | src/google/protobuf/io/tokenizer.h | 5 | ||||
-rw-r--r-- | src/google/protobuf/io/tokenizer_unittest.cc | 6 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream_impl.h | 9 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream_unittest.cc | 3 |
12 files changed, 697 insertions, 257 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc index e17a4775..6a91a13d 100644 --- a/src/google/protobuf/io/coded_stream.cc +++ b/src/google/protobuf/io/coded_stream.cc @@ -38,9 +38,9 @@ // will not cross the end of the buffer, since we can avoid a lot // of branching in this case. -#include <stack> +#include <google/protobuf/io/coded_stream_inl.h> +#include <algorithm> #include <limits.h> -#include <google/protobuf/io/coded_stream.h> #include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/stubs/common.h> #include <google/protobuf/stubs/stl_util-inl.h> @@ -52,11 +52,6 @@ namespace io { namespace { -static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB - -static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB -static const int kDefaultRecursionLimit = 64; - static const int kMaxVarintBytes = 10; static const int kMaxVarint32Bytes = 5; @@ -65,72 +60,28 @@ static const int kMaxVarint32Bytes = 5; // CodedInputStream ================================================== -CodedInputStream::CodedInputStream(ZeroCopyInputStream* input) - : input_(input), - buffer_(NULL), - buffer_size_(0), - total_bytes_read_(0), - overflow_bytes_(0), - last_tag_(0), - legitimate_message_end_(false), - aliasing_enabled_(false), - current_limit_(INT_MAX), - buffer_size_after_limit_(0), - total_bytes_limit_(kDefaultTotalBytesLimit), - total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), - recursion_depth_(0), - recursion_limit_(kDefaultRecursionLimit) { - // Eagerly Refresh() so buffer space is immediately available. - Refresh(); -} - -CodedInputStream::CodedInputStream(const uint8* buffer, int size) - : input_(NULL), - buffer_(buffer), - buffer_size_(size), - total_bytes_read_(size), - overflow_bytes_(0), - last_tag_(0), - legitimate_message_end_(false), - aliasing_enabled_(false), - current_limit_(size), - buffer_size_after_limit_(0), - total_bytes_limit_(kDefaultTotalBytesLimit), - total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), - recursion_depth_(0), - recursion_limit_(kDefaultRecursionLimit) { - // Note that setting current_limit_ == size is important to prevent some - // code paths from trying to access input_ and segfaulting. -} - -CodedInputStream::~CodedInputStream() { - if (input_ != NULL) { - BackUpInputToCurrentPosition(); - } -} - void CodedInputStream::BackUpInputToCurrentPosition() { - int backup_bytes = buffer_size_ + buffer_size_after_limit_ + overflow_bytes_; + int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_; if (backup_bytes > 0) { input_->BackUp(backup_bytes); // total_bytes_read_ doesn't include overflow_bytes_. - total_bytes_read_ -= buffer_size_ + buffer_size_after_limit_; - buffer_size_ = 0; + total_bytes_read_ -= BufferSize() + buffer_size_after_limit_; + buffer_end_ = buffer_; buffer_size_after_limit_ = 0; overflow_bytes_ = 0; } } inline void CodedInputStream::RecomputeBufferLimits() { - buffer_size_ += buffer_size_after_limit_; + buffer_end_ += buffer_size_after_limit_; int closest_limit = min(current_limit_, total_bytes_limit_); if (closest_limit < total_bytes_read_) { // The limit position is in the current buffer. We must adjust // the buffer size accordingly. buffer_size_after_limit_ = total_bytes_read_ - closest_limit; - buffer_size_ -= buffer_size_after_limit_; + buffer_end_ -= buffer_size_after_limit_; } else { buffer_size_after_limit_ = 0; } @@ -139,7 +90,7 @@ inline void CodedInputStream::RecomputeBufferLimits() { CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) { // Current position relative to the beginning of the stream. int current_position = total_bytes_read_ - - (buffer_size_ + buffer_size_after_limit_); + (BufferSize() + buffer_size_after_limit_); Limit old_limit = current_limit_; @@ -176,7 +127,7 @@ void CodedInputStream::PopLimit(Limit limit) { int CodedInputStream::BytesUntilLimit() { if (current_limit_ == INT_MAX) return -1; int current_position = total_bytes_read_ - - (buffer_size_ + buffer_size_after_limit_); + (BufferSize() + buffer_size_after_limit_); return current_limit_ - current_position; } @@ -186,7 +137,7 @@ void CodedInputStream::SetTotalBytesLimit( // Make sure the limit isn't already past, since this could confuse other // code. int current_position = total_bytes_read_ - - (buffer_size_ + buffer_size_after_limit_); + (BufferSize() + buffer_size_after_limit_); total_bytes_limit_ = max(current_position, total_bytes_limit); total_bytes_warning_threshold_ = warning_threshold; RecomputeBufferLimits(); @@ -203,7 +154,9 @@ void CodedInputStream::PrintTotalBytesLimitError() { bool CodedInputStream::Skip(int count) { if (count < 0) return false; // security: count is often user-supplied - if (count <= buffer_size_) { + const int original_buffer_size = BufferSize(); + + if (count <= original_buffer_size) { // Just skipping within the current buffer. Easy. Advance(count); return true; @@ -211,13 +164,13 @@ bool CodedInputStream::Skip(int count) { if (buffer_size_after_limit_ > 0) { // We hit a limit inside this buffer. Advance to the limit and fail. - Advance(buffer_size_); + Advance(original_buffer_size); return false; } - count -= buffer_size_; + count -= original_buffer_size; buffer_ = NULL; - buffer_size_ = 0; + buffer_end_ = buffer_; // Make sure this skip doesn't try to skip past the current limit. int closest_limit = min(current_limit_, total_bytes_limit_); @@ -236,20 +189,21 @@ bool CodedInputStream::Skip(int count) { } bool CodedInputStream::GetDirectBufferPointer(const void** data, int* size) { - if (buffer_size_ == 0 && !Refresh()) return false; + if (BufferSize() == 0 && !Refresh()) return false; *data = buffer_; - *size = buffer_size_; + *size = BufferSize(); return true; } bool CodedInputStream::ReadRaw(void* buffer, int size) { - while (buffer_size_ < size) { + int current_buffer_size; + while ((current_buffer_size = BufferSize()) < size) { // Reading past end of buffer. Copy what we have, then refresh. - memcpy(buffer, buffer_, buffer_size_); - buffer = reinterpret_cast<uint8*>(buffer) + buffer_size_; - size -= buffer_size_; - Advance(buffer_size_); + memcpy(buffer, buffer_, current_buffer_size); + buffer = reinterpret_cast<uint8*>(buffer) + current_buffer_size; + size -= current_buffer_size; + Advance(current_buffer_size); if (!Refresh()) return false; } @@ -261,27 +215,25 @@ bool CodedInputStream::ReadRaw(void* buffer, int size) { bool CodedInputStream::ReadString(string* buffer, int size) { if (size < 0) return false; // security: size is often user-supplied + return InternalReadStringInline(buffer, size); +} +bool CodedInputStream::ReadStringFallback(string* buffer, int size) { if (!buffer->empty()) { buffer->clear(); } - if (size < buffer_size_) { - STLStringResizeUninitialized(buffer, size); - memcpy((uint8*)buffer->data(), buffer_, size); - Advance(size); - return true; - } - - while (buffer_size_ < size) { + int current_buffer_size; + while ((current_buffer_size = BufferSize()) < size) { // Some STL implementations "helpfully" crash on buffer->append(NULL, 0). - if (buffer_size_ != 0) { + if (current_buffer_size != 0) { // Note: string1.append(string2) is O(string2.size()) (as opposed to // O(string1.size() + string2.size()), which would be bad). - buffer->append(reinterpret_cast<const char*>(buffer_), buffer_size_); + buffer->append(reinterpret_cast<const char*>(buffer_), + current_buffer_size); } - size -= buffer_size_; - Advance(buffer_size_); + size -= current_buffer_size; + Advance(current_buffer_size); if (!Refresh()) return false; } @@ -292,11 +244,11 @@ bool CodedInputStream::ReadString(string* buffer, int size) { } -bool CodedInputStream::ReadLittleEndian32(uint32* value) { +bool CodedInputStream::ReadLittleEndian32Fallback(uint32* value) { uint8 bytes[sizeof(*value)]; const uint8* ptr; - if (buffer_size_ >= sizeof(*value)) { + if (BufferSize() >= sizeof(*value)) { // Fast path: Enough bytes in the buffer to read directly. ptr = buffer_; Advance(sizeof(*value)); @@ -305,19 +257,15 @@ bool CodedInputStream::ReadLittleEndian32(uint32* value) { if (!ReadRaw(bytes, sizeof(*value))) return false; ptr = bytes; } - - *value = (static_cast<uint32>(ptr[0]) ) | - (static_cast<uint32>(ptr[1]) << 8) | - (static_cast<uint32>(ptr[2]) << 16) | - (static_cast<uint32>(ptr[3]) << 24); + ReadLittleEndian32FromArray(ptr, value); return true; } -bool CodedInputStream::ReadLittleEndian64(uint64* value) { +bool CodedInputStream::ReadLittleEndian64Fallback(uint64* value) { uint8 bytes[sizeof(*value)]; const uint8* ptr; - if (buffer_size_ >= sizeof(*value)) { + if (BufferSize() >= sizeof(*value)) { // Fast path: Enough bytes in the buffer to read directly. ptr = buffer_; Advance(sizeof(*value)); @@ -326,99 +274,152 @@ bool CodedInputStream::ReadLittleEndian64(uint64* value) { if (!ReadRaw(bytes, sizeof(*value))) return false; ptr = bytes; } + ReadLittleEndian64FromArray(ptr, value); + return true; +} + +namespace { + +inline const uint8* ReadVarint32FromArray( + const uint8* buffer, uint32* value) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; +inline const uint8* ReadVarint32FromArray(const uint8* buffer, uint32* value) { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + const uint8* ptr = buffer; + uint32 b; + uint32 result; + + b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done; + + // If the input is larger than 32 bits, we still need to read it all + // and discard the high-order bits. + for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) { + b = *(ptr++); if (!(b & 0x80)) goto done; + } + + // We have overrun the maximum size of a varint (10 bytes). Assume + // the data is corrupt. + return NULL; + + done: + *value = result; + return ptr; +} + +} // namespace - uint32 part0 = (static_cast<uint32>(ptr[0]) ) | - (static_cast<uint32>(ptr[1]) << 8) | - (static_cast<uint32>(ptr[2]) << 16) | - (static_cast<uint32>(ptr[3]) << 24); - uint32 part1 = (static_cast<uint32>(ptr[4]) ) | - (static_cast<uint32>(ptr[5]) << 8) | - (static_cast<uint32>(ptr[6]) << 16) | - (static_cast<uint32>(ptr[7]) << 24); - *value = static_cast<uint64>(part0) | - (static_cast<uint64>(part1) << 32); +bool CodedInputStream::ReadVarint32Slow(uint32* value) { + uint64 result; + // Directly invoke ReadVarint64Fallback, since we already tried to optimize + // for one-byte varints. + if (!ReadVarint64Fallback(&result)) return false; + *value = (uint32)result; return true; } bool CodedInputStream::ReadVarint32Fallback(uint32* value) { - if (buffer_size_ >= kMaxVarintBytes || + if (BufferSize() >= kMaxVarintBytes || // Optimization: If the varint ends at exactly the end of the buffer, // we can detect that and still use the fast path. - (buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) { - // Fast path: We have enough bytes left in the buffer to guarantee that - // this read won't cross the end, so we can skip the checks. - const uint8* ptr = buffer_; - uint32 b; - uint32 result; - - b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done; - b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; - b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; - b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; - b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done; - - // If the input is larger than 32 bits, we still need to read it all - // and discard the high-order bits. - for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) { - b = *(ptr++); if (!(b & 0x80)) goto done; - } - - // We have overrun the maximum size of a varint (10 bytes). Assume - // the data is corrupt. - return false; - - done: - Advance(ptr - buffer_); - *value = result; + (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { + const uint8* end = ReadVarint32FromArray(buffer_, value); + if (end == NULL) return false; + buffer_ = end; return true; - } else { - // Optimization: If we're at a limit, detect that quickly. (This is - // common when reading tags.) - while (buffer_size_ == 0) { - // Detect cases where we definitely hit a byte limit without calling - // Refresh(). - if (// If we hit a limit, buffer_size_after_limit_ will be non-zero. - buffer_size_after_limit_ > 0 && - // Make sure that the limit we hit is not total_bytes_limit_, since - // in that case we still need to call Refresh() so that it prints an - // error. - total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) { - // We hit a byte limit. - legitimate_message_end_ = true; - return false; - } + // Really slow case: we will incur the cost of an extra function call here, + // but moving this out of line reduces the size of this function, which + // improves the common case. In micro benchmarks, this is worth about 10-15% + return ReadVarint32Slow(value); + } +} - // Call refresh. - if (!Refresh()) { - // Refresh failed. Make sure that it failed due to EOF, not because - // we hit total_bytes_limit_, which, unlike normal limits, is not a - // valid place to end a message. - int current_position = total_bytes_read_ - buffer_size_after_limit_; - if (current_position >= total_bytes_limit_) { - // Hit total_bytes_limit_. But if we also hit the normal limit, - // we're still OK. - legitimate_message_end_ = current_limit_ == total_bytes_limit_; - } else { - legitimate_message_end_ = true; - } - return false; +uint32 CodedInputStream::ReadTagSlow() { + if (buffer_ == buffer_end_) { + // Call refresh. + if (!Refresh()) { + // Refresh failed. Make sure that it failed due to EOF, not because + // we hit total_bytes_limit_, which, unlike normal limits, is not a + // valid place to end a message. + int current_position = total_bytes_read_ - buffer_size_after_limit_; + if (current_position >= total_bytes_limit_) { + // Hit total_bytes_limit_. But if we also hit the normal limit, + // we're still OK. + legitimate_message_end_ = current_limit_ == total_bytes_limit_; + } else { + legitimate_message_end_ = true; } + return 0; } + } - // Slow path: Just do a 64-bit read. - uint64 result; - if (!ReadVarint64(&result)) return false; - *value = (uint32)result; - return true; + // For the slow path, just do a 64-bit read. Try to optimize for one-byte tags + // again, since we have now refreshed the buffer. + uint64 result; + if (!ReadVarint64(&result)) return 0; + return static_cast<uint32>(result); +} + +uint32 CodedInputStream::ReadTagFallback() { + if (BufferSize() >= kMaxVarintBytes || + // Optimization: If the varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { + uint32 tag; + const uint8* end = ReadVarint32FromArray(buffer_, &tag); + if (end == NULL) { + return 0; + } + buffer_ = end; + return tag; + } else { + // We are commonly at a limit when attempting to read tags. Try to quickly + // detect this case without making another function call. + if (buffer_ == buffer_end_ && buffer_size_after_limit_ > 0 && + // Make sure that the limit we hit is not total_bytes_limit_, since + // in that case we still need to call Refresh() so that it prints an + // error. + total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) { + // We hit a byte limit. + legitimate_message_end_ = true; + return 0; + } + return ReadTagSlow(); } } -bool CodedInputStream::ReadVarint64(uint64* value) { - if (buffer_size_ >= kMaxVarintBytes || +bool CodedInputStream::ReadVarint64Slow(uint64* value) { + // Slow path: This read might cross the end of the buffer, so we + // need to check and refresh the buffer if and when it does. + + uint64 result = 0; + int count = 0; + uint32 b; + + do { + if (count == kMaxVarintBytes) return false; + while (buffer_ == buffer_end_) { + if (!Refresh()) return false; + } + b = *buffer_; + result |= static_cast<uint64>(b & 0x7F) << (7 * count); + Advance(1); + ++count; + } while (b & 0x80); + + *value = result; + return true; +} + +bool CodedInputStream::ReadVarint64Fallback(uint64* value) { + if (BufferSize() >= kMaxVarintBytes || // Optimization: If the varint ends at exactly the end of the buffer, // we can detect that and still use the fast path. - (buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) { + (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { // Fast path: We have enough bytes left in the buffer to guarantee that // this read won't cross the end, so we can skip the checks. @@ -442,7 +443,7 @@ bool CodedInputStream::ReadVarint64(uint64* value) { // We have overrun the maximum size of a varint (10 bytes). The data // must be corrupt. - return false; + return NULL; done: Advance(ptr - buffer_); @@ -450,33 +451,13 @@ bool CodedInputStream::ReadVarint64(uint64* value) { (static_cast<uint64>(part1) << 28) | (static_cast<uint64>(part2) << 56); return true; - } else { - // Slow path: This read might cross the end of the buffer, so we - // need to check and refresh the buffer if and when it does. - - uint64 result = 0; - int count = 0; - uint32 b; - - do { - if (count == kMaxVarintBytes) return false; - while (buffer_size_ == 0) { - if (!Refresh()) return false; - } - b = *buffer_; - result |= static_cast<uint64>(b & 0x7F) << (7 * count); - Advance(1); - ++count; - } while(b & 0x80); - - *value = result; - return true; + return ReadVarint64Slow(value); } } bool CodedInputStream::Refresh() { - GOOGLE_DCHECK_EQ(buffer_size_, 0); + GOOGLE_DCHECK_EQ(0, BufferSize()); if (buffer_size_after_limit_ > 0 || overflow_bytes_ > 0 || total_bytes_read_ == current_limit_) { @@ -507,25 +488,27 @@ bool CodedInputStream::Refresh() { } const void* void_buffer; - if (input_->Next(&void_buffer, &buffer_size_)) { + int buffer_size; + if (input_->Next(&void_buffer, &buffer_size)) { buffer_ = reinterpret_cast<const uint8*>(void_buffer); - GOOGLE_CHECK_GE(buffer_size_, 0); + buffer_end_ = buffer_ + buffer_size; + GOOGLE_CHECK_GE(buffer_size, 0); - if (total_bytes_read_ <= INT_MAX - buffer_size_) { - total_bytes_read_ += buffer_size_; + if (total_bytes_read_ <= INT_MAX - buffer_size) { + total_bytes_read_ += buffer_size; } else { - // Overflow. Reset buffer_size_ to not include the bytes beyond INT_MAX. + // Overflow. Reset buffer_end_ to not include the bytes beyond INT_MAX. // We can't get that far anyway, because total_bytes_limit_ is guaranteed // to be less than it. We need to keep track of the number of bytes // we discarded, though, so that we can call input_->BackUp() to back // up over them on destruction. // The following line is equivalent to: - // overflow_bytes_ = total_bytes_read_ + buffer_size_ - INT_MAX; + // overflow_bytes_ = total_bytes_read_ + buffer_size - INT_MAX; // except that it avoids overflows. Signed integer overflow has // undefined results according to the C standard. - overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size_); - buffer_size_ -= overflow_bytes_; + overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size); + buffer_end_ -= overflow_bytes_; total_bytes_read_ = INT_MAX; } @@ -533,7 +516,7 @@ bool CodedInputStream::Refresh() { return true; } else { buffer_ = NULL; - buffer_size_ = 0; + buffer_end_ = NULL; return false; } } diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h index fa023f35..dcbb0d45 100644 --- a/src/google/protobuf/io/coded_stream.h +++ b/src/google/protobuf/io/coded_stream.h @@ -114,10 +114,15 @@ #include <sys/param.h> #endif // !_MSC_VER #include <google/protobuf/stubs/common.h> +#include <google/protobuf/stubs/common.h> // for GOOGLE_PREDICT_TRUE macro namespace google { namespace protobuf { + +class DescriptorPool; +class MessageFactory; + namespace io { // Defined in this file. @@ -166,6 +171,11 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // types of data not covered by the CodedInputStream interface. bool GetDirectBufferPointer(const void** data, int* size); + // Like GetDirectBufferPointer, but this method is inlined, and does not + // attempt to Refresh() if the buffer is currently empty. + inline void GetDirectBufferPointerInline(const void** data, + int* size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; + // Read raw bytes, copying them into the given buffer. bool ReadRaw(void* buffer, int size); @@ -177,6 +187,10 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // could claim that a string is going to be MAX_INT bytes long in order to // crash the server because it can't allocate this much space at once. bool ReadString(string* buffer, int size); + // Like the above, with inlined optimizations. This should only be used + // by the protobuf implementation. + inline bool InternalReadStringInline(string* buffer, + int size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; // Read a 32-bit little-endian integer. @@ -184,6 +198,15 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // Read a 64-bit little-endian integer. bool ReadLittleEndian64(uint64* value); + // These methods read from an externally provided buffer. The caller is + // responsible for ensuring that the buffer has sufficient space. + // Read a 32-bit little-endian integer. + static const uint8* ReadLittleEndian32FromArray(const uint8* buffer, + uint32* value); + // Read a 64-bit little-endian integer. + static const uint8* ReadLittleEndian64FromArray(const uint8* buffer, + uint64* value); + // Read an unsigned integer with Varint encoding, truncating to 32 bits. // Reading a 32-bit value is equivalent to reading a 64-bit one and casting // it to uint32, but may be more efficient. @@ -208,6 +231,17 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // when given a constant parameter, but GCC doesn't want to inline by default. bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; + // Like above, except this reads from the specified buffer. The caller is + // responsible for ensuring that the buffer is large enough to read a varint + // of the expected size. For best performance, use a compile-time constant as + // the expected tag parameter. + // + // Returns a pointer beyond the expected tag if it was found, or NULL if it + // was not. + static const uint8* ExpectTagFromArray( + const uint8* buffer, + uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; + // Usually returns true if no more bytes can be read. Always returns false // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent // call to LastTagWas() will act as if ReadTag() had been called and returned @@ -318,12 +352,90 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // Decrements the recursion depth. void DecrementRecursionDepth(); + // Extension Registry ---------------------------------------------- + // ADVANCED USAGE: 99.9% of people can ignore this section. + // + // By default, when parsing extensions, the parser looks for extension + // definitions in the pool which owns the outer message's Descriptor. + // However, you may call SetExtensionRegistry() to provide an alternative + // pool instead. This makes it possible, for example, to parse a message + // using a generated class, but represent some extensions using + // DynamicMessage. + + // Set the pool used to look up extensions. Most users do not need to call + // this as the correct pool will be chosen automatically. + // + // WARNING: It is very easy to misuse this. Carefully read the requirements + // below. Do not use this unless you are sure you need it. Almost no one + // does. + // + // Let's say you are parsing a message into message object m, and you want + // to take advantage of SetExtensionRegistry(). You must follow these + // requirements: + // + // The given DescriptorPool must contain m->GetDescriptor(). It is not + // sufficient for it to simply contain a descriptor that has the same name + // and content -- it must be the *exact object*. In other words: + // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) == + // m->GetDescriptor()); + // There are two ways to satisfy this requirement: + // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless + // because this is the pool that would be used anyway if you didn't call + // SetExtensionRegistry() at all. + // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an + // "underlay". Read the documentation for DescriptorPool for more + // information about underlays. + // + // You must also provide a MessageFactory. This factory will be used to + // construct Message objects representing extensions. The factory's + // GetPrototype() MUST return non-NULL for any Descriptor which can be found + // through the provided pool. + // + // If the provided factory might return instances of protocol-compiler- + // generated (i.e. compiled-in) types, or if the outer message object m is + // a generated type, then the given factory MUST have this property: If + // GetPrototype() is given a Descriptor which resides in + // DescriptorPool::generated_pool(), the factory MUST return the same + // prototype which MessageFactory::generated_factory() would return. That + // is, given a descriptor for a generated type, the factory must return an + // instance of the generated class (NOT DynamicMessage). However, when + // given a descriptor for a type that is NOT in generated_pool, the factory + // is free to return any implementation. + // + // The reason for this requirement is that generated sub-objects may be + // accessed via the standard (non-reflection) extension accessor methods, + // and these methods will down-cast the object to the generated class type. + // If the object is not actually of that type, the results would be undefined. + // On the other hand, if an extension is not compiled in, then there is no + // way the code could end up accessing it via the standard accessors -- the + // only way to access the extension is via reflection. When using reflection, + // DynamicMessage and generated messages are indistinguishable, so it's fine + // if these objects are represented using DynamicMessage. + // + // Using DynamicMessageFactory on which you have called + // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the + // above requirement. + // + // If either pool or factory is NULL, both must be NULL. + // + // Note that this feature is ignored when parsing "lite" messages as they do + // not have descriptors. + void SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory); + + // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool + // has been provided. + const DescriptorPool* GetExtensionPool(); + + // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no + // factory has been provided. + MessageFactory* GetExtensionFactory(); + private: GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream); ZeroCopyInputStream* input_; const uint8* buffer_; - int buffer_size_; // size of current buffer + const uint8* buffer_end_; // pointer to the end of the buffer. int total_bytes_read_; // total bytes read from input_, including // the current buffer @@ -334,7 +446,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // LastTagWas() stuff. uint32 last_tag_; // result of last ReadTag(). - // This is set true by ReadVarint32Fallback() if it is called when exactly + // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly // at EOF, or by ExpectAtEnd() when it returns true. This happens when we // reach the end of a message and attempt to read another tag. bool legitimate_message_end_; @@ -365,6 +477,12 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // Recursion depth limit, set by SetRecursionLimit(). int recursion_limit_; + // See SetExtensionRegistry(). + const DescriptorPool* extension_pool_; + MessageFactory* extension_factory_; + + // Private member functions. + // Advance the buffer by a given number of bytes. void Advance(int amount); @@ -379,10 +497,36 @@ class LIBPROTOBUF_EXPORT CodedInputStream { void PrintTotalBytesLimitError(); // Called when the buffer runs out to request more data. Implies an - // Advance(buffer_size_). + // Advance(BufferSize()). bool Refresh(); + // When parsing varints, we optimize for the common case of small values, and + // then optimize for the case when the varint fits within the current buffer + // piece. The Fallback method is used when we can't use the one-byte + // optimization. The Slow method is yet another fallback when the buffer is + // not large enough. Making the slow path out-of-line speeds up the common + // case by 10-15%. The slow path is fairly uncommon: it only triggers when a + // message crosses multiple buffers. bool ReadVarint32Fallback(uint32* value); + bool ReadVarint64Fallback(uint64* value); + bool ReadVarint32Slow(uint32* value); + bool ReadVarint64Slow(uint64* value); + bool ReadLittleEndian32Fallback(uint32* value); + bool ReadLittleEndian64Fallback(uint64* value); + // Fallback/slow methods for reading tags. These do not update last_tag_, + // but will set legitimate_message_end_ if we are at the end of the input + // stream. + uint32 ReadTagFallback(); + uint32 ReadTagSlow(); + bool ReadStringFallback(string* buffer, int size); + + // Return the size of the buffer. + int BufferSize() const; + + static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB + + static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB + static const int kDefaultRecursionLimit = 64; }; // Class which encodes and writes binary data which is composed of varint- @@ -568,7 +712,7 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // methods optimize for that case. inline bool CodedInputStream::ReadVarint32(uint32* value) { - if (buffer_size_ != 0 && *buffer_ < 0x80) { + if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) { *value = *buffer_; Advance(1); return true; @@ -577,20 +721,93 @@ inline bool CodedInputStream::ReadVarint32(uint32* value) { } } +inline bool CodedInputStream::ReadVarint64(uint64* value) { + if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) { + *value = *buffer_; + Advance(1); + return true; + } else { + return ReadVarint64Fallback(value); + } +} + +// static +inline const uint8* CodedInputStream::ReadLittleEndian32FromArray( + const uint8* buffer, + uint32* value) { +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ + defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN + memcpy(value, buffer, sizeof(*value)); + return buffer + sizeof(*value); +#else + *value = (static_cast<uint32>(buffer[0]) ) | + (static_cast<uint32>(buffer[1]) << 8) | + (static_cast<uint32>(buffer[2]) << 16) | + (static_cast<uint32>(buffer[3]) << 24); + return buffer + sizeof(*value); +#endif +} +// static +inline const uint8* CodedInputStream::ReadLittleEndian64FromArray( + const uint8* buffer, + uint64* value) { +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ + defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN + memcpy(value, buffer, sizeof(*value)); + return buffer + sizeof(*value); +#else + uint32 part0 = (static_cast<uint32>(buffer[0]) ) | + (static_cast<uint32>(buffer[1]) << 8) | + (static_cast<uint32>(buffer[2]) << 16) | + (static_cast<uint32>(buffer[3]) << 24); + uint32 part1 = (static_cast<uint32>(buffer[4]) ) | + (static_cast<uint32>(buffer[5]) << 8) | + (static_cast<uint32>(buffer[6]) << 16) | + (static_cast<uint32>(buffer[7]) << 24); + *value = static_cast<uint64>(part0) | + (static_cast<uint64>(part1) << 32); + return buffer + sizeof(*value); +#endif +} + +inline bool CodedInputStream::ReadLittleEndian32(uint32* value) { +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ + defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN + if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) { + memcpy(value, buffer_, sizeof(*value)); + Advance(sizeof(*value)); + return true; + } else { + return ReadLittleEndian32Fallback(value); + } +#else + return ReadLittleEndian32Fallback(value); +#endif +} + +inline bool CodedInputStream::ReadLittleEndian64(uint64* value) { +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ + defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN + if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) { + memcpy(value, buffer_, sizeof(*value)); + Advance(sizeof(*value)); + return true; + } else { + return ReadLittleEndian64Fallback(value); + } +#else + return ReadLittleEndian64Fallback(value); +#endif +} + inline uint32 CodedInputStream::ReadTag() { - if (buffer_size_ != 0 && buffer_[0] < 0x80) { + if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] < 0x80) { last_tag_ = buffer_[0]; Advance(1); return last_tag_; - } else if (buffer_size_ >= 2 && buffer_[1] < 0x80) { - last_tag_ = (buffer_[0] & 0x7f) + (buffer_[1] << 7); - Advance(2); - return last_tag_; - } else if (ReadVarint32Fallback(&last_tag_)) { - return last_tag_; } else { - last_tag_ = 0; - return 0; + last_tag_ = ReadTagFallback(); + return last_tag_; } } @@ -604,14 +821,14 @@ inline bool CodedInputStream::ConsumedEntireMessage() { inline bool CodedInputStream::ExpectTag(uint32 expected) { if (expected < (1 << 7)) { - if (buffer_size_ != 0 && buffer_[0] == expected) { + if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) { Advance(1); return true; } else { return false; } } else if (expected < (1 << 14)) { - if (buffer_size_ >= 2 && + if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) && buffer_[0] == static_cast<uint8>(expected | 0x80) && buffer_[1] == static_cast<uint8>(expected >> 7)) { Advance(2); @@ -625,11 +842,32 @@ inline bool CodedInputStream::ExpectTag(uint32 expected) { } } +inline const uint8* CodedInputStream::ExpectTagFromArray( + const uint8* buffer, uint32 expected) { + if (expected < (1 << 7)) { + if (buffer[0] == expected) { + return buffer + 1; + } + } else if (expected < (1 << 14)) { + if (buffer[0] == static_cast<uint8>(expected | 0x80) && + buffer[1] == static_cast<uint8>(expected >> 7)) { + return buffer + 2; + } + } + return NULL; +} + +inline void CodedInputStream::GetDirectBufferPointerInline(const void** data, + int* size) { + *data = buffer_; + *size = buffer_end_ - buffer_; +} + inline bool CodedInputStream::ExpectAtEnd() { // If we are at a limit we know no more bytes can be read. Otherwise, it's // hard to say without calling Refresh(), and we'd rather not do that. - if (buffer_size_ == 0 && buffer_size_after_limit_ != 0) { + if (buffer_ == buffer_end_ && buffer_size_after_limit_ != 0) { last_tag_ = 0; // Pretend we called ReadTag()... legitimate_message_end_ = true; // ... and it hit EOF. return true; @@ -677,11 +915,11 @@ inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray( inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value, uint8* target) { -#if !defined(PROTOBUF_TEST_NOT_LITTLE_ENDIAN) && \ +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN memcpy(target, &value, sizeof(value)); #else - target[0] = static_cast<uint8>(value ); + target[0] = static_cast<uint8>(value); target[1] = static_cast<uint8>(value >> 8); target[2] = static_cast<uint8>(value >> 16); target[3] = static_cast<uint8>(value >> 24); @@ -691,18 +929,18 @@ inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value, inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value, uint8* target) { -#if !defined(PROTOBUF_TEST_NOT_LITTLE_ENDIAN) && \ +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN memcpy(target, &value, sizeof(value)); #else uint32 part0 = static_cast<uint32>(value); uint32 part1 = static_cast<uint32>(value >> 32); - target[0] = static_cast<uint8>(part0 ); + target[0] = static_cast<uint8>(part0); target[1] = static_cast<uint8>(part0 >> 8); target[2] = static_cast<uint8>(part0 >> 16); target[3] = static_cast<uint8>(part0 >> 24); - target[4] = static_cast<uint8>(part1 ); + target[4] = static_cast<uint8>(part1); target[5] = static_cast<uint8>(part1 >> 8); target[6] = static_cast<uint8>(part1 >> 16); target[7] = static_cast<uint8>(part1 >> 24); @@ -759,7 +997,6 @@ inline int CodedOutputStream::ByteCount() const { inline void CodedInputStream::Advance(int amount) { buffer_ += amount; - buffer_size_ -= amount; } inline void CodedOutputStream::Advance(int amount) { @@ -780,6 +1017,72 @@ inline void CodedInputStream::DecrementRecursionDepth() { if (recursion_depth_ > 0) --recursion_depth_; } +inline void CodedInputStream::SetExtensionRegistry(DescriptorPool* pool, + MessageFactory* factory) { + extension_pool_ = pool; + extension_factory_ = factory; +} + +inline const DescriptorPool* CodedInputStream::GetExtensionPool() { + return extension_pool_; +} + +inline MessageFactory* CodedInputStream::GetExtensionFactory() { + return extension_factory_; +} + +inline int CodedInputStream::BufferSize() const { + return buffer_end_ - buffer_; +} + +inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input) + : input_(input), + buffer_(NULL), + buffer_end_(NULL), + total_bytes_read_(0), + overflow_bytes_(0), + last_tag_(0), + legitimate_message_end_(false), + aliasing_enabled_(false), + current_limit_(INT_MAX), + buffer_size_after_limit_(0), + total_bytes_limit_(kDefaultTotalBytesLimit), + total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), + recursion_depth_(0), + recursion_limit_(kDefaultRecursionLimit), + extension_pool_(NULL), + extension_factory_(NULL) { + // Eagerly Refresh() so buffer space is immediately available. + Refresh(); +} + +inline CodedInputStream::CodedInputStream(const uint8* buffer, int size) + : input_(NULL), + buffer_(buffer), + buffer_end_(buffer + size), + total_bytes_read_(size), + overflow_bytes_(0), + last_tag_(0), + legitimate_message_end_(false), + aliasing_enabled_(false), + current_limit_(size), + buffer_size_after_limit_(0), + total_bytes_limit_(kDefaultTotalBytesLimit), + total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), + recursion_depth_(0), + recursion_limit_(kDefaultRecursionLimit), + extension_pool_(NULL), + extension_factory_(NULL) { + // Note that setting current_limit_ == size is important to prevent some + // code paths from trying to access input_ and segfaulting. +} + +inline CodedInputStream::~CodedInputStream() { + if (input_ != NULL) { + BackUpInputToCurrentPosition(); + } +} + } // namespace io } // namespace protobuf diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc index e165fb93..7d298332 100644 --- a/src/google/protobuf/io/coded_stream_unittest.cc +++ b/src/google/protobuf/io/coded_stream_unittest.cc @@ -242,6 +242,24 @@ TEST_1D(CodedStreamTest, ExpectTag, kVarintCases) { } } +TEST_1D(CodedStreamTest, ExpectTagFromArray, kVarintCases) { + memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size); + + const uint32 expected_value = static_cast<uint32>(kVarintCases_case.value); + + // If the expectation succeeds, it should return a pointer past the tag. + if (kVarintCases_case.size <= 2) { + EXPECT_TRUE(NULL == + CodedInputStream::ExpectTagFromArray(buffer_, + expected_value + 1)); + EXPECT_TRUE(buffer_ + kVarintCases_case.size == + CodedInputStream::ExpectTagFromArray(buffer_, expected_value)); + } else { + EXPECT_TRUE(NULL == + CodedInputStream::ExpectTagFromArray(buffer_, expected_value)); + } +} + TEST_2D(CodedStreamTest, ReadVarint64, kVarintCases, kBlockSizes) { memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size); ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); @@ -529,10 +547,32 @@ TEST_2D(CodedStreamTest, WriteLittleEndian64, kFixed64Cases, kBlockSizes) { EXPECT_EQ(0, memcmp(buffer_, kFixed64Cases_case.bytes, sizeof(uint64))); } +// Tests using the static methods to read fixed-size values from raw arrays. + +TEST_1D(CodedStreamTest, ReadLittleEndian32FromArray, kFixed32Cases) { + memcpy(buffer_, kFixed32Cases_case.bytes, sizeof(kFixed32Cases_case.bytes)); + + uint32 value; + const uint8* end = CodedInputStream::ReadLittleEndian32FromArray( + buffer_, &value); + EXPECT_EQ(kFixed32Cases_case.value, value); + EXPECT_TRUE(end == buffer_ + sizeof(value)); +} + +TEST_1D(CodedStreamTest, ReadLittleEndian64FromArray, kFixed64Cases) { + memcpy(buffer_, kFixed64Cases_case.bytes, sizeof(kFixed64Cases_case.bytes)); + + uint64 value; + const uint8* end = CodedInputStream::ReadLittleEndian64FromArray( + buffer_, &value); + EXPECT_EQ(kFixed64Cases_case.value, value); + EXPECT_TRUE(end == buffer_ + sizeof(value)); +} + // ------------------------------------------------------------------- // Raw reads and writes -const char kRawBytes[] = "Some bytes which will be writted and read raw."; +const char kRawBytes[] = "Some bytes which will be written and read raw."; TEST_1D(CodedStreamTest, ReadRaw, kBlockSizes) { memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); @@ -593,6 +633,22 @@ TEST_1D(CodedStreamTest, ReadStringImpossiblyLarge, kBlockSizes) { } } +TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnStack) { + // Same test as above, except directly use a buffer. This used to cause + // crashes while the above did not. + uint8 buffer[8]; + CodedInputStream coded_input(buffer, 8); + string str; + EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30)); +} + +TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnHeap) { + scoped_array<uint8> buffer(new uint8[8]); + CodedInputStream coded_input(buffer.get(), 8); + string str; + EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30)); +} + // ------------------------------------------------------------------- // Skip @@ -652,6 +708,36 @@ TEST_F(CodedStreamTest, GetDirectBufferPointerInput) { EXPECT_EQ(8, size); } +TEST_F(CodedStreamTest, GetDirectBufferPointerInlineInput) { + ArrayInputStream input(buffer_, sizeof(buffer_), 8); + CodedInputStream coded_input(&input); + + const void* ptr; + int size; + + coded_input.GetDirectBufferPointerInline(&ptr, &size); + EXPECT_EQ(buffer_, ptr); + EXPECT_EQ(8, size); + + // Peeking again should return the same pointer. + coded_input.GetDirectBufferPointerInline(&ptr, &size); + EXPECT_EQ(buffer_, ptr); + EXPECT_EQ(8, size); + + // Skip forward in the same buffer then peek again. + EXPECT_TRUE(coded_input.Skip(3)); + coded_input.GetDirectBufferPointerInline(&ptr, &size); + EXPECT_EQ(buffer_ + 3, ptr); + EXPECT_EQ(5, size); + + // Skip to end of buffer and peek -- should return false and provide an empty + // buffer. It does not try to Refresh(). + EXPECT_TRUE(coded_input.Skip(5)); + coded_input.GetDirectBufferPointerInline(&ptr, &size); + EXPECT_EQ(buffer_ + 8, ptr); + EXPECT_EQ(0, size); +} + TEST_F(CodedStreamTest, GetDirectBufferPointerOutput) { ArrayOutputStream output(buffer_, sizeof(buffer_), 8); CodedOutputStream coded_output(&output); diff --git a/src/google/protobuf/io/gzip_stream.cc b/src/google/protobuf/io/gzip_stream.cc index 84d277f4..e1a35ea2 100644 --- a/src/google/protobuf/io/gzip_stream.cc +++ b/src/google/protobuf/io/gzip_stream.cc @@ -315,6 +315,6 @@ bool GzipOutputStream::Close() { } // namespace io } // namespace protobuf -} // namespace google #endif // HAVE_ZLIB +} // namespace google diff --git a/src/google/protobuf/io/printer.cc b/src/google/protobuf/io/printer.cc index 937d777e..c7d3074d 100644 --- a/src/google/protobuf/io/printer.cc +++ b/src/google/protobuf/io/printer.cc @@ -65,10 +65,10 @@ void Printer::Print(const map<string, string>& variables, const char* text) { if (text[i] == '\n') { // Saw newline. If there is more text, we may need to insert an indent // here. So, write what we have so far, including the '\n'. - Write(text + pos, i - pos + 1); + WriteRaw(text + pos, i - pos + 1); pos = i + 1; - // Setting this true will cause the next Write() to insert an indent + // Setting this true will cause the next WriteRaw() to insert an indent // first. at_start_of_line_ = true; @@ -76,7 +76,7 @@ void Printer::Print(const map<string, string>& variables, const char* text) { // Saw the start of a variable name. // Write what we have so far. - Write(text + pos, i - pos); + WriteRaw(text + pos, i - pos); pos = i + 1; // Find closing delimiter. @@ -90,14 +90,14 @@ void Printer::Print(const map<string, string>& variables, const char* text) { string varname(text + pos, endpos - pos); if (varname.empty()) { // Two delimiters in a row reduce to a literal delimiter character. - Write(&variable_delimiter_, 1); + WriteRaw(&variable_delimiter_, 1); } else { // Replace with the variable's value. map<string, string>::const_iterator iter = variables.find(varname); if (iter == variables.end()) { GOOGLE_LOG(DFATAL) << " Undefined variable: " << varname; } else { - Write(iter->second.data(), iter->second.size()); + WriteRaw(iter->second.data(), iter->second.size()); } } @@ -108,7 +108,7 @@ void Printer::Print(const map<string, string>& variables, const char* text) { } // Write the rest. - Write(text + pos, size - pos); + WriteRaw(text + pos, size - pos); } void Printer::Print(const char* text) { @@ -145,14 +145,23 @@ void Printer::Outdent() { indent_.resize(indent_.size() - 2); } -void Printer::Write(const char* data, int size) { +void Printer::PrintRaw(const string& data) { + WriteRaw(data.data(), data.size()); +} + +void Printer::PrintRaw(const char* data) { + if (failed_) return; + WriteRaw(data, strlen(data)); +} + +void Printer::WriteRaw(const char* data, int size) { if (failed_) return; if (size == 0) return; if (at_start_of_line_) { // Insert an indent. at_start_of_line_ = false; - Write(indent_.data(), indent_.size()); + WriteRaw(indent_.data(), indent_.size()); if (failed_) return; } diff --git a/src/google/protobuf/io/printer.h b/src/google/protobuf/io/printer.h index b7c4cf39..de085389 100644 --- a/src/google/protobuf/io/printer.h +++ b/src/google/protobuf/io/printer.h @@ -59,8 +59,8 @@ class ZeroCopyOutputStream; // zero_copy_stream.h // The above writes "My name is Bob." to the output stream. // // Printer aggressively enforces correct usage, crashing (with assert failures) -// in the case of undefined variables. This helps greatly in debugging code -// which uses it. This class is not intended to be used by production servers. +// in the case of undefined variables in debug builds. This helps greatly in +// debugging code which uses it. class LIBPROTOBUF_EXPORT Printer { public: // Create a printer that writes text to the given output stream. Use the @@ -94,15 +94,24 @@ class LIBPROTOBUF_EXPORT Printer { // level is zero. void Outdent(); + // Write a string to the output buffer. + // This method does not look for newlines to add indentation. + void PrintRaw(const string& data); + + // Write a zero-delimited string to output buffer. + // This method does not look for newlines to add indentation. + void PrintRaw(const char* data); + + // Write some bytes to the output buffer. + // This method does not look for newlines to add indentation. + void WriteRaw(const char* data, int size); + // True if any write to the underlying stream failed. (We don't just // crash in this case because this is an I/O failure, not a programming // error.) bool failed() const { return failed_; } private: - // Write some text to the output buffer. - void Write(const char* data, int size); - const char variable_delimiter_; ZeroCopyOutputStream* const output_; diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc index 69c7ee34..580a53da 100644 --- a/src/google/protobuf/io/printer_unittest.cc +++ b/src/google/protobuf/io/printer_unittest.cc @@ -76,10 +76,38 @@ TEST(Printer, BasicPrinting) { buffer[output.ByteCount()] = '\0'; - EXPECT_STREQ(buffer, - "Hello World! This is the same line.\n" - "But this is a new one.\n" - "And this is another one."); + EXPECT_STREQ("Hello World! This is the same line.\n" + "But this is a new one.\n" + "And this is another one.", + buffer); + } +} + +TEST(Printer, WriteRaw) { + char buffer[8192]; + + for (int block_size = 1; block_size < 512; block_size *= 2) { + ArrayOutputStream output(buffer, sizeof(buffer), block_size); + + { + string string_obj = "From an object\n"; + Printer printer(&output, '$'); + printer.WriteRaw("Hello World!", 12); + printer.PrintRaw(" This is the same line.\n"); + printer.PrintRaw("But this is a new one.\nAnd this is another one."); + printer.WriteRaw("\n", 1); + printer.PrintRaw(string_obj); + EXPECT_FALSE(printer.failed()); + } + + buffer[output.ByteCount()] = '\0'; + + EXPECT_STREQ("Hello World! This is the same line.\n" + "But this is a new one.\n" + "And this is another one." + "\n" + "From an object\n", + buffer); } } @@ -98,6 +126,7 @@ TEST(Printer, VariableSubstitution) { vars["abcdefg"] = "1234"; printer.Print(vars, "Hello $foo$!\nbar = $bar$\n"); + printer.PrintRaw("RawBit\n"); printer.Print(vars, "$abcdefg$\nA literal dollar sign: $$"); vars["foo"] = "blah"; @@ -108,12 +137,13 @@ TEST(Printer, VariableSubstitution) { buffer[output.ByteCount()] = '\0'; - EXPECT_STREQ(buffer, - "Hello World!\n" - "bar = $foo$\n" - "1234\n" - "A literal dollar sign: $\n" - "Now foo = blah."); + EXPECT_STREQ("Hello World!\n" + "bar = $foo$\n" + "RawBit\n" + "1234\n" + "A literal dollar sign: $\n" + "Now foo = blah.", + buffer); } } @@ -125,15 +155,17 @@ TEST(Printer, InlineVariableSubstitution) { { Printer printer(&output, '$'); printer.Print("Hello $foo$!\n", "foo", "World"); + printer.PrintRaw("RawBit\n"); printer.Print("$foo$ $bar$\n", "foo", "one", "bar", "two"); EXPECT_FALSE(printer.failed()); } buffer[output.ByteCount()] = '\0'; - EXPECT_STREQ(buffer, - "Hello World!\n" - "one two\n"); + EXPECT_STREQ("Hello World!\n" + "RawBit\n" + "one two\n", + buffer); } TEST(Printer, Indenting) { @@ -156,6 +188,8 @@ TEST(Printer, Indenting) { printer.Indent(); printer.Print(" And this is still the same line.\n" "But this is indented.\n"); + printer.PrintRaw("RawBit has indent at start\n"); + printer.PrintRaw("but not after a raw newline\n"); printer.Print(vars, "Note that a newline in a variable will break " "indenting, as we see$newline$here.\n"); printer.Indent(); @@ -169,16 +203,19 @@ TEST(Printer, Indenting) { buffer[output.ByteCount()] = '\0'; - EXPECT_STREQ(buffer, + EXPECT_STREQ( "This is not indented.\n" " This is indented\n" " And so is this\n" "But this is not. And this is still the same line.\n" " But this is indented.\n" - " Note that a newline in a variable will break indenting, as we see\n" + " RawBit has indent at start\n" + "but not after a raw newline\n" + "Note that a newline in a variable will break indenting, as we see\n" "here.\n" " And this is double-indented\n" - "Back to normal."); + "Back to normal.", + buffer); } } diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc index 0bda451b..75cbfed5 100644 --- a/src/google/protobuf/io/tokenizer.cc +++ b/src/google/protobuf/io/tokenizer.cc @@ -119,7 +119,7 @@ namespace { CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v'); -CHARACTER_CLASS(Unprintable, c < ' ' && c != '\0'); +CHARACTER_CLASS(Unprintable, c < ' ' && c > '\0'); CHARACTER_CLASS(Digit, '0' <= c && c <= '9'); CHARACTER_CLASS(OctalDigit, '0' <= c && c <= '7'); diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h index 98386e0b..d115161f 100644 --- a/src/google/protobuf/io/tokenizer.h +++ b/src/google/protobuf/io/tokenizer.h @@ -63,6 +63,11 @@ class LIBPROTOBUF_EXPORT ErrorCollector { // 1 to each before printing them. virtual void AddError(int line, int column, const string& message) = 0; + // Indicates that there was a warning in the input at the given line and + // column numbers. The numbers are zero-based, so you may want to add + // 1 to each before printing them. + virtual void AddWarning(int line, int column, const string& message) { } + private: GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector); }; diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc index eac1455d..3598e188 100644 --- a/src/google/protobuf/io/tokenizer_unittest.cc +++ b/src/google/protobuf/io/tokenizer_unittest.cc @@ -397,6 +397,12 @@ MultiTokenCase kMultiTokenCases[] = { { Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0 }, { Tokenizer::TYPE_END , "" , 1, 3 }, }}, + + // Bytes with the high-order bit set should not be seen as control characters. + { "\300", { + { Tokenizer::TYPE_SYMBOL, "\300", 0, 0 }, + { Tokenizer::TYPE_END , "" , 0, 1 }, + }}, }; TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) { diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h index 64e96cd4..9fedb005 100644 --- a/src/google/protobuf/io/zero_copy_stream_impl.h +++ b/src/google/protobuf/io/zero_copy_stream_impl.h @@ -133,10 +133,11 @@ class LIBPROTOBUF_EXPORT FileInputStream : public ZeroCopyInputStream { // A ZeroCopyOutputStream which writes to a file descriptor. // -// FileInputStream is preferred over using an ofstream with OstreamOutputStream. -// The latter will introduce an extra layer of buffering, harming performance. -// Also, it's conceivable that FileInputStream could someday be enhanced -// to use zero-copy file descriptors on OSs which support them. +// FileOutputStream is preferred over using an ofstream with +// OstreamOutputStream. The latter will introduce an extra layer of buffering, +// harming performance. Also, it's conceivable that FileOutputStream could +// someday be enhanced to use zero-copy file descriptors on OSs which +// support them. class LIBPROTOBUF_EXPORT FileOutputStream : public ZeroCopyOutputStream { public: // Creates a stream that writes to the given Unix file descriptor. diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc index f919b7ac..8229ee6d 100644 --- a/src/google/protobuf/io/zero_copy_stream_unittest.cc +++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc @@ -403,7 +403,8 @@ TEST_F(IoTest, CompressionOptions) { string golden; File::ReadFileToStringOrDie( - TestSourceDir() + "/google/protobuf/testdata/golden_message", &golden); + TestSourceDir() + "/google/protobuf/testdata/golden_message", + &golden); GzipOutputStream::Options options; string gzip_compressed = Compress(golden, options); |