diff options
author | jieluo@google.com <jieluo@google.com@630680e5-0e50-0410-840e-4b1c322b438d> | 2014-07-18 00:47:59 +0000 |
---|---|---|
committer | jieluo@google.com <jieluo@google.com@630680e5-0e50-0410-840e-4b1c322b438d> | 2014-07-18 00:47:59 +0000 |
commit | 4de8f55113007fdc8e34107950e605fc0209d465 (patch) | |
tree | 92b7da8757a7740d9e1f2d3ead233542947d8c8c /src/google/protobuf/io | |
parent | c5553a3d18f80132b9079c5504bc0aa1f7f950a0 (diff) |
down integrate to svn
Diffstat (limited to 'src/google/protobuf/io')
-rw-r--r-- | src/google/protobuf/io/coded_stream.cc | 101 | ||||
-rw-r--r-- | src/google/protobuf/io/coded_stream.h | 94 | ||||
-rw-r--r-- | src/google/protobuf/io/coded_stream_inl.h | 3 | ||||
-rw-r--r-- | src/google/protobuf/io/coded_stream_unittest.cc | 193 | ||||
-rw-r--r-- | src/google/protobuf/io/gzip_stream.h | 2 | ||||
-rw-r--r-- | src/google/protobuf/io/printer_unittest.cc | 2 | ||||
-rw-r--r-- | src/google/protobuf/io/strtod.cc | 113 | ||||
-rw-r--r-- | src/google/protobuf/io/strtod.h | 50 | ||||
-rw-r--r-- | src/google/protobuf/io/tokenizer.cc | 44 | ||||
-rw-r--r-- | src/google/protobuf/io/tokenizer.h | 17 | ||||
-rw-r--r-- | src/google/protobuf/io/tokenizer_unittest.cc | 24 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream.cc | 9 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream.h | 10 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream_impl.cc | 8 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream_impl.h | 1 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream_impl_lite.cc | 20 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream_impl_lite.h | 14 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream_unittest.cc | 27 |
18 files changed, 674 insertions, 58 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc index 36add8c3..f6a84533 100644 --- a/src/google/protobuf/io/coded_stream.cc +++ b/src/google/protobuf/io/coded_stream.cc @@ -83,6 +83,10 @@ CodedInputStream::~CodedInputStream() { int CodedInputStream::default_recursion_limit_ = 100; +void CodedOutputStream::EnableAliasing(bool enabled) { + aliasing_enabled_ = enabled && output_->AllowsAliasing(); +} + void CodedInputStream::BackUpInputToCurrentPosition() { int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_; if (backup_bytes > 0) { @@ -167,6 +171,11 @@ void CodedInputStream::SetTotalBytesLimit( RecomputeBufferLimits(); } +int CodedInputStream::BytesUntilTotalBytesLimit() const { + if (total_bytes_limit_ == INT_MAX) return -1; + return total_bytes_limit_ - CurrentPosition(); +} + void CodedInputStream::PrintTotalBytesLimitError() { GOOGLE_LOG(ERROR) << "A protocol message was rejected because it was too " "big (more than " << total_bytes_limit_ @@ -247,6 +256,14 @@ bool CodedInputStream::ReadStringFallback(string* buffer, int size) { buffer->clear(); } + int closest_limit = min(current_limit_, total_bytes_limit_); + if (closest_limit != INT_MAX) { + int bytes_to_limit = closest_limit - CurrentPosition(); + if (bytes_to_limit > 0 && size > 0 && size <= bytes_to_limit) { + buffer->reserve(size); + } + } + int current_buffer_size; while ((current_buffer_size = BufferSize()) < size) { // Some STL implementations "helpfully" crash on buffer->append(NULL, 0). @@ -313,11 +330,16 @@ inline const uint8* ReadVarint32FromArray(const uint8* buffer, uint32* value) { uint32 b; uint32 result; - b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done; - b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; - b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; - b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; - b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done; + b = *(ptr++); result = b ; if (!(b & 0x80)) goto done; + result -= 0x80; + b = *(ptr++); result += b << 7; if (!(b & 0x80)) goto done; + result -= 0x80 << 7; + b = *(ptr++); result += b << 14; if (!(b & 0x80)) goto done; + result -= 0x80 << 14; + b = *(ptr++); result += b << 21; if (!(b & 0x80)) goto done; + result -= 0x80 << 21; + b = *(ptr++); result += b << 28; if (!(b & 0x80)) goto done; + // "result -= 0x80 << 28" is irrevelant. // If the input is larger than 32 bits, we still need to read it all // and discard the high-order bits. @@ -347,8 +369,8 @@ bool CodedInputStream::ReadVarint32Slow(uint32* value) { bool CodedInputStream::ReadVarint32Fallback(uint32* value) { if (BufferSize() >= kMaxVarintBytes || - // Optimization: If the varint ends at exactly the end of the buffer, - // we can detect that and still use the fast path. + // Optimization: We're also safe if the buffer is non-empty and it ends + // with a byte that would terminate a varint. (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { const uint8* end = ReadVarint32FromArray(buffer_, value); if (end == NULL) return false; @@ -391,8 +413,8 @@ uint32 CodedInputStream::ReadTagSlow() { uint32 CodedInputStream::ReadTagFallback() { const int buf_size = BufferSize(); if (buf_size >= kMaxVarintBytes || - // Optimization: If the varint ends at exactly the end of the buffer, - // we can detect that and still use the fast path. + // Optimization: We're also safe if the buffer is non-empty and it ends + // with a byte that would terminate a varint. (buf_size > 0 && !(buffer_end_[-1] & 0x80))) { uint32 tag; const uint8* end = ReadVarint32FromArray(buffer_, &tag); @@ -444,8 +466,8 @@ bool CodedInputStream::ReadVarint64Slow(uint64* value) { bool CodedInputStream::ReadVarint64Fallback(uint64* value) { if (BufferSize() >= kMaxVarintBytes || - // Optimization: If the varint ends at exactly the end of the buffer, - // we can detect that and still use the fast path. + // Optimization: We're also safe if the buffer is non-empty and it ends + // with a byte that would terminate a varint. (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { // Fast path: We have enough bytes left in the buffer to guarantee that // this read won't cross the end, so we can skip the checks. @@ -457,16 +479,26 @@ bool CodedInputStream::ReadVarint64Fallback(uint64* value) { // processors. uint32 part0 = 0, part1 = 0, part2 = 0; - b = *(ptr++); part0 = (b & 0x7F) ; if (!(b & 0x80)) goto done; - b = *(ptr++); part0 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; - b = *(ptr++); part0 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; - b = *(ptr++); part0 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; - b = *(ptr++); part1 = (b & 0x7F) ; if (!(b & 0x80)) goto done; - b = *(ptr++); part1 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; - b = *(ptr++); part1 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; - b = *(ptr++); part1 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; - b = *(ptr++); part2 = (b & 0x7F) ; if (!(b & 0x80)) goto done; - b = *(ptr++); part2 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); part0 = b ; if (!(b & 0x80)) goto done; + part0 -= 0x80; + b = *(ptr++); part0 += b << 7; if (!(b & 0x80)) goto done; + part0 -= 0x80 << 7; + b = *(ptr++); part0 += b << 14; if (!(b & 0x80)) goto done; + part0 -= 0x80 << 14; + b = *(ptr++); part0 += b << 21; if (!(b & 0x80)) goto done; + part0 -= 0x80 << 21; + b = *(ptr++); part1 = b ; if (!(b & 0x80)) goto done; + part1 -= 0x80; + b = *(ptr++); part1 += b << 7; if (!(b & 0x80)) goto done; + part1 -= 0x80 << 7; + b = *(ptr++); part1 += b << 14; if (!(b & 0x80)) goto done; + part1 -= 0x80 << 14; + b = *(ptr++); part1 += b << 21; if (!(b & 0x80)) goto done; + part1 -= 0x80 << 21; + b = *(ptr++); part2 = b ; if (!(b & 0x80)) goto done; + part2 -= 0x80; + b = *(ptr++); part2 += b << 7; if (!(b & 0x80)) goto done; + // "part2 -= 0x80 << 7" is irrelevant because (0x80 << 7) << 56 is 0. // We have overrun the maximum size of a varint (10 bytes). The data // must be corrupt. @@ -555,7 +587,8 @@ CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output) buffer_(NULL), buffer_size_(0), total_bytes_(0), - had_error_(false) { + had_error_(false), + aliasing_enabled_(false) { // Eagerly Refresh() so buffer space is immediately available. Refresh(); // The Refresh() may have failed. If the client doesn't write any data, @@ -609,6 +642,23 @@ uint8* CodedOutputStream::WriteRawToArray( } +void CodedOutputStream::WriteAliasedRaw(const void* data, int size) { + if (size < buffer_size_ + ) { + WriteRaw(data, size); + } else { + if (buffer_size_ > 0) { + output_->BackUp(buffer_size_); + total_bytes_ -= buffer_size_; + buffer_ = NULL; + buffer_size_ = 0; + } + + total_bytes_ += size; + had_error_ |= !output_->WriteAliasedRaw(data, size); + } +} + void CodedOutputStream::WriteLittleEndian32(uint32 value) { uint8 bytes[sizeof(value)]; @@ -852,6 +902,13 @@ int CodedOutputStream::VarintSize64(uint64 value) { } } +uint8* CodedOutputStream::WriteStringWithSizeToArray(const string& str, + uint8* target) { + GOOGLE_DCHECK_LE(str.size(), kuint32max); + target = WriteVarint32ToArray(str.size(), target); + return WriteStringToArray(str, target); +} + } // namespace io } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h index 66cbee00..50a03a11 100644 --- a/src/google/protobuf/io/coded_stream.h +++ b/src/google/protobuf/io/coded_stream.h @@ -233,11 +233,22 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // Read a tag. This calls ReadVarint32() and returns the result, or returns // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates // the last tag value, which can be checked with LastTagWas(). - // Always inline because this is only called in once place per parse loop + // Always inline because this is only called in one place per parse loop // but it is called for every iteration of said loop, so it should be fast. // GCC doesn't want to inline this by default. uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE; + // This usually a faster alternative to ReadTag() when cutoff is a manifest + // constant. It does particularly well for cutoff >= 127. The first part + // of the return value is the tag that was read, though it can also be 0 in + // the cases where ReadTag() would return 0. If the second part is true + // then the tag is known to be in [0, cutoff]. If not, the tag either is + // above cutoff or is 0. (There's intentional wiggle room when tag is 0, + // because that can arise in several ways, and for best performance we want + // to avoid an extra "is tag == 0?" check here.) + inline std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff) + GOOGLE_ATTRIBUTE_ALWAYS_INLINE; + // Usually returns true if calling ReadVarint32() now would produce the given // value. Will always return false if ReadVarint32() would not return the // given value. If ExpectTag() returns true, it also advances past @@ -264,8 +275,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // zero, and ConsumedEntireMessage() will return true. bool ExpectAtEnd(); - // If the last call to ReadTag() returned the given value, returns true. - // Otherwise, returns false; + // If the last call to ReadTag() or ReadTagWithCutoff() returned the + // given value, returns true. Otherwise, returns false; // // This is needed because parsers for some types of embedded messages // (with field type TYPE_GROUP) don't actually know that they've reached the @@ -333,7 +344,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // cause integer overflows is 512MB. The default limit is 64MB. Apps // should set shorter limits if possible. If warning_threshold is not -1, // a warning will be printed to stderr after warning_threshold bytes are - // read. For backwards compatibility all negative values get squached to -1, + // read. For backwards compatibility all negative values get squashed to -1, // as other negative values might have special internal meanings. // An error will always be printed to stderr if the limit is reached. // @@ -356,6 +367,10 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // something unusual. void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold); + // The Total Bytes Limit minus the Current Position, or -1 if there + // is no Total Bytes Limit. + int BytesUntilTotalBytesLimit() const; + // Recursion Limit ------------------------------------------------- // To prevent corrupt or malicious messages from causing stack overflows, // we must keep track of the depth of recursion when parsing embedded @@ -466,7 +481,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { int overflow_bytes_; // LastTagWas() stuff. - uint32 last_tag_; // result of last ReadTag(). + uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff(). // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly // at EOF, or by ExpectAtEnd() when it returns true. This happens when we @@ -638,6 +653,9 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // Write raw bytes, copying them from the given buffer. void WriteRaw(const void* buffer, int size); + // Like WriteRaw() but will try to write aliased data if aliasing is + // turned on. + void WriteRawMaybeAliased(const void* data, int size); // Like WriteRaw() but writing directly to the target array. // This is _not_ inlined, as the compiler often optimizes memcpy into inline // copy loops. Since this gets called by every field with string or bytes @@ -649,8 +667,21 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { void WriteString(const string& str); // Like WriteString() but writing directly to the target array. static uint8* WriteStringToArray(const string& str, uint8* target); + // Write the varint-encoded size of str followed by str. + static uint8* WriteStringWithSizeToArray(const string& str, uint8* target); + // Instructs the CodedOutputStream to allow the underlying + // ZeroCopyOutputStream to hold pointers to the original structure instead of + // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the + // underlying stream does not support aliasing, then enabling it has no + // affect. For now, this only affects the behavior of + // WriteRawMaybeAliased(). + // + // NOTE: It is caller's responsibility to ensure that the chunk of memory + // remains live until all of the data has been consumed from the stream. + void EnableAliasing(bool enabled); + // Write a 32-bit little-endian integer. void WriteLittleEndian32(uint32 value); // Like WriteLittleEndian32() but writing directly to the target array. @@ -725,6 +756,7 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { int buffer_size_; int total_bytes_; // Sum of sizes of all buffers seen so far. bool had_error_; // Whether an error occurred during output. + bool aliasing_enabled_; // See EnableAliasing(). // Advance the buffer by a given number of bytes. void Advance(int amount); @@ -733,6 +765,10 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // Advance(buffer_size_). bool Refresh(); + // Like WriteRaw() but may avoid copying if the underlying + // ZeroCopyOutputStream supports it. + void WriteAliasedRaw(const void* buffer, int size); + static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target); // Always-inlined versions of WriteVarint* functions so that code can be @@ -850,6 +886,45 @@ inline uint32 CodedInputStream::ReadTag() { } } +inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff( + uint32 cutoff) { + // In performance-sensitive code we can expect cutoff to be a compile-time + // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at + // compile time. + if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) { + // Hot case: buffer_ non_empty, buffer_[0] in [1, 128). + // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields + // is large enough then is it better to check for the two-byte case first? + if (static_cast<int8>(buffer_[0]) > 0) { + const uint32 kMax1ByteVarint = 0x7f; + uint32 tag = last_tag_ = buffer_[0]; + Advance(1); + return make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff); + } + // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available, + // and tag is two bytes. The latter is tested by bitwise-and-not of the + // first byte and the second byte. + if (cutoff >= 0x80 && + GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) && + GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) { + const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f; + uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80); + Advance(2); + // It might make sense to test for tag == 0 now, but it is so rare that + // that we don't bother. A varint-encoded 0 should be one byte unless + // the encoder lost its mind. The second part of the return value of + // this function is allowed to be either true or false if the tag is 0, + // so we don't have to check for tag == 0. We may need to check whether + // it exceeds cutoff. + bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff; + return make_pair(tag, at_or_below_cutoff); + } + } + // Slow path + last_tag_ = ReadTagFallback(); + return make_pair(last_tag_, static_cast<uint32>(last_tag_ - 1) < cutoff); +} + inline bool CodedInputStream::LastTagWas(uint32 expected) { return last_tag_ == expected; } @@ -1029,6 +1104,15 @@ inline void CodedOutputStream::WriteString(const string& str) { WriteRaw(str.data(), static_cast<int>(str.size())); } +inline void CodedOutputStream::WriteRawMaybeAliased( + const void* data, int size) { + if (aliasing_enabled_) { + WriteAliasedRaw(data, size); + } else { + WriteRaw(data, size); + } +} + inline uint8* CodedOutputStream::WriteStringToArray( const string& str, uint8* target) { return WriteRawToArray(str.data(), static_cast<int>(str.size()), target); diff --git a/src/google/protobuf/io/coded_stream_inl.h b/src/google/protobuf/io/coded_stream_inl.h index 144f44f0..41dc10e3 100644 --- a/src/google/protobuf/io/coded_stream_inl.h +++ b/src/google/protobuf/io/coded_stream_inl.h @@ -37,6 +37,7 @@ #define GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__ #include <google/protobuf/io/coded_stream.h> +#include <google/protobuf/io/zero_copy_stream_impl_lite.h> #include <string> #include <google/protobuf/stubs/stl_util.h> @@ -53,7 +54,7 @@ inline bool CodedInputStream::InternalReadStringInline(string* buffer, // When buffer is empty, string_as_array(buffer) will return NULL but memcpy // requires non-NULL pointers even when size is 0. Hench this check. if (size > 0) { - memcpy(string_as_array(buffer), buffer_, size); + memcpy(mutable_string_data(buffer), buffer_, size); Advance(size); } return true; diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc index 2daab194..b39987ca 100644 --- a/src/google/protobuf/io/coded_stream_unittest.cc +++ b/src/google/protobuf/io/coded_stream_unittest.cc @@ -144,6 +144,10 @@ uint8 CodedStreamTest::buffer_[CodedStreamTest::kBufferSize]; // checks. const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024}; +// In several ReadCord test functions, we either clear the Cord before ReadCord +// calls or not. +const bool kResetCords[] = {false, true}; + // ------------------------------------------------------------------- // Varint tests. @@ -682,6 +686,191 @@ TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnHeap) { EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30)); } +TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnTotalLimit, kBlockSizes) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + coded_input.SetTotalBytesLimit(sizeof(kRawBytes), sizeof(kRawBytes)); + EXPECT_EQ(sizeof(kRawBytes), coded_input.BytesUntilTotalBytesLimit()); + + string str; + EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes))); + EXPECT_EQ(sizeof(kRawBytes) - strlen(kRawBytes), + coded_input.BytesUntilTotalBytesLimit()); + EXPECT_EQ(kRawBytes, str); + // TODO(liujisi): Replace with a more meaningful test (see cl/60966023). + EXPECT_GE(str.capacity(), strlen(kRawBytes)); + } + + EXPECT_EQ(strlen(kRawBytes), input.ByteCount()); +} + +TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnPushedLimit, kBlockSizes) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + coded_input.PushLimit(sizeof(buffer_)); + + string str; + EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes))); + EXPECT_EQ(kRawBytes, str); + // TODO(liujisi): Replace with a more meaningful test (see cl/60966023). + EXPECT_GE(str.capacity(), strlen(kRawBytes)); + } + + EXPECT_EQ(strlen(kRawBytes), input.ByteCount()); +} + +TEST_F(CodedStreamTest, ReadStringNoReservationIfLimitsNotSet) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + + string str; + EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes))); + EXPECT_EQ(kRawBytes, str); + // Note: this check depends on string class implementation. It + // expects that string will allocate more than strlen(kRawBytes) + // if the content of kRawBytes is appended to string in small + // chunks. + // TODO(liujisi): Replace with a more meaningful test (see cl/60966023). + EXPECT_GE(str.capacity(), strlen(kRawBytes)); + } + + EXPECT_EQ(strlen(kRawBytes), input.ByteCount()); +} + +TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsNegative) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + coded_input.PushLimit(sizeof(buffer_)); + + string str; + EXPECT_FALSE(coded_input.ReadString(&str, -1)); + // Note: this check depends on string class implementation. It + // expects that string will always allocate the same amount of + // memory for an empty string. + EXPECT_EQ(string().capacity(), str.capacity()); + } +} + +TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsLarge) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + coded_input.PushLimit(sizeof(buffer_)); + + string str; + EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30)); + EXPECT_GT(1 << 30, str.capacity()); + } +} + +TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheLimit) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + coded_input.PushLimit(16); + + string str; + EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes))); + // Note: this check depends on string class implementation. It + // expects that string will allocate less than strlen(kRawBytes) + // for an empty string. + EXPECT_GT(strlen(kRawBytes), str.capacity()); + } +} + +TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheTotalBytesLimit) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + coded_input.SetTotalBytesLimit(16, 16); + + string str; + EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes))); + // Note: this check depends on string class implementation. It + // expects that string will allocate less than strlen(kRawBytes) + // for an empty string. + EXPECT_GT(strlen(kRawBytes), str.capacity()); + } +} + +TEST_F(CodedStreamTest, + ReadStringNoReservationSizeIsOverTheClosestLimit_GlobalLimitIsCloser) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + coded_input.PushLimit(sizeof(buffer_)); + coded_input.SetTotalBytesLimit(16, 16); + + string str; + EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes))); + // Note: this check depends on string class implementation. It + // expects that string will allocate less than strlen(kRawBytes) + // for an empty string. + EXPECT_GT(strlen(kRawBytes), str.capacity()); + } +} + +TEST_F(CodedStreamTest, + ReadStringNoReservationSizeIsOverTheClosestLimit_LocalLimitIsCloser) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + coded_input.PushLimit(16); + coded_input.SetTotalBytesLimit(sizeof(buffer_), sizeof(buffer_)); + EXPECT_EQ(sizeof(buffer_), coded_input.BytesUntilTotalBytesLimit()); + + string str; + EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes))); + // Note: this check depends on string class implementation. It + // expects that string will allocate less than strlen(kRawBytes) + // for an empty string. + EXPECT_GT(strlen(kRawBytes), str.capacity()); + } +} + // ------------------------------------------------------------------- // Skip @@ -980,9 +1169,11 @@ TEST_F(CodedStreamTest, TotalBytesLimit) { ArrayInputStream input(buffer_, sizeof(buffer_)); CodedInputStream coded_input(&input); coded_input.SetTotalBytesLimit(16, -1); + EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit()); string str; EXPECT_TRUE(coded_input.ReadString(&str, 16)); + EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit()); vector<string> errors; @@ -997,7 +1188,9 @@ TEST_F(CodedStreamTest, TotalBytesLimit) { "A protocol message was rejected because it was too big", errors[0]); coded_input.SetTotalBytesLimit(32, -1); + EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit()); EXPECT_TRUE(coded_input.ReadString(&str, 16)); + EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit()); } TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) { diff --git a/src/google/protobuf/io/gzip_stream.h b/src/google/protobuf/io/gzip_stream.h index 365e9ea5..7ee24bc3 100644 --- a/src/google/protobuf/io/gzip_stream.h +++ b/src/google/protobuf/io/gzip_stream.h @@ -118,7 +118,7 @@ class LIBPROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream { ZLIB = 2, }; - struct LIBPROTOBUF_EXPORT Options { + struct Options { // Defaults to GZIP. Format format; diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc index c9b30359..76fb9442 100644 --- a/src/google/protobuf/io/printer_unittest.cc +++ b/src/google/protobuf/io/printer_unittest.cc @@ -231,7 +231,7 @@ TEST(Printer, Death) { EXPECT_DEBUG_DEATH(printer.Print("$unclosed"), "Unclosed variable name"); EXPECT_DEBUG_DEATH(printer.Outdent(), "without matching Indent"); } -#endif // PROTOBUF__HAS_DEATH_TEST +#endif // PROTOBUF_HAS_DEATH_TEST TEST(Printer, WriteFailurePartial) { char buffer[17]; diff --git a/src/google/protobuf/io/strtod.cc b/src/google/protobuf/io/strtod.cc new file mode 100644 index 00000000..cf0c6e10 --- /dev/null +++ b/src/google/protobuf/io/strtod.cc @@ -0,0 +1,113 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <google/protobuf/io/strtod.h> + +#include <cstdio> +#include <cstring> +#include <string> + +#include <google/protobuf/stubs/common.h> + +namespace google { +namespace protobuf { +namespace io { + +// ---------------------------------------------------------------------- +// NoLocaleStrtod() +// This code will make you cry. +// ---------------------------------------------------------------------- + +namespace { + +// Returns a string identical to *input except that the character pointed to +// by radix_pos (which should be '.') is replaced with the locale-specific +// radix character. +string LocalizeRadix(const char* input, const char* radix_pos) { + // Determine the locale-specific radix character by calling sprintf() to + // print the number 1.5, then stripping off the digits. As far as I can + // tell, this is the only portable, thread-safe way to get the C library + // to divuldge the locale's radix character. No, localeconv() is NOT + // thread-safe. + char temp[16]; + int size = sprintf(temp, "%.1f", 1.5); + GOOGLE_CHECK_EQ(temp[0], '1'); + GOOGLE_CHECK_EQ(temp[size-1], '5'); + GOOGLE_CHECK_LE(size, 6); + + // Now replace the '.' in the input with it. + string result; + result.reserve(strlen(input) + size - 3); + result.append(input, radix_pos); + result.append(temp + 1, size - 2); + result.append(radix_pos + 1); + return result; +} + +} // namespace + +double NoLocaleStrtod(const char* text, char** original_endptr) { + // We cannot simply set the locale to "C" temporarily with setlocale() + // as this is not thread-safe. Instead, we try to parse in the current + // locale first. If parsing stops at a '.' character, then this is a + // pretty good hint that we're actually in some other locale in which + // '.' is not the radix character. + + char* temp_endptr; + double result = strtod(text, &temp_endptr); + if (original_endptr != NULL) *original_endptr = temp_endptr; + if (*temp_endptr != '.') return result; + + // Parsing halted on a '.'. Perhaps we're in a different locale? Let's + // try to replace the '.' with a locale-specific radix character and + // try again. + string localized = LocalizeRadix(text, temp_endptr); + const char* localized_cstr = localized.c_str(); + char* localized_endptr; + result = strtod(localized_cstr, &localized_endptr); + if ((localized_endptr - localized_cstr) > + (temp_endptr - text)) { + // This attempt got further, so replacing the decimal must have helped. + // Update original_endptr to point at the right location. + if (original_endptr != NULL) { + // size_diff is non-zero if the localized radix has multiple bytes. + int size_diff = localized.size() - strlen(text); + // const_cast is necessary to match the strtod() interface. + *original_endptr = const_cast<char*>( + text + (localized_endptr - localized_cstr - size_diff)); + } + } + + return result; +} + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/io/strtod.h b/src/google/protobuf/io/strtod.h new file mode 100644 index 00000000..2be3c43d --- /dev/null +++ b/src/google/protobuf/io/strtod.h @@ -0,0 +1,50 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A locale-independent version of strtod(), used to parse floating +// point default values in .proto files, where the decimal separator +// is always a dot. + +#ifndef GOOGLE_PROTOBUF_IO_STRTOD_H__ +#define GOOGLE_PROTOBUF_IO_STRTOD_H__ + +namespace google { +namespace protobuf { +namespace io { + +// A locale-independent version of the standard strtod(), which always +// uses a dot as the decimal separator. +double NoLocaleStrtod(const char* str, char** endptr); + +} // namespace io +} // namespace protobuf + +} // namespace google +#endif // GOOGLE_PROTOBUF_IO_STRTOD_H__ diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc index a022b71d..d149305a 100644 --- a/src/google/protobuf/io/tokenizer.cc +++ b/src/google/protobuf/io/tokenizer.cc @@ -91,6 +91,7 @@ #include <google/protobuf/io/tokenizer.h> #include <google/protobuf/stubs/common.h> #include <google/protobuf/stubs/stringprintf.h> +#include <google/protobuf/io/strtod.h> #include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/stubs/strutil.h> #include <google/protobuf/stubs/stl_util.h> @@ -195,7 +196,9 @@ Tokenizer::Tokenizer(ZeroCopyInputStream* input, record_target_(NULL), record_start_(-1), allow_f_after_float_(false), - comment_style_(CPP_COMMENT_STYLE) { + comment_style_(CPP_COMMENT_STYLE), + require_space_after_number_(true), + allow_multiline_strings_(false) { current_.line = 0; current_.column = 0; @@ -350,9 +353,16 @@ void Tokenizer::ConsumeString(char delimiter) { while (true) { switch (current_char_) { case '\0': - case '\n': { - AddError("String literals cannot cross line boundaries."); + AddError("Unexpected end of string."); return; + + case '\n': { + if (!allow_multiline_strings_) { + AddError("String literals cannot cross line boundaries."); + return; + } + NextChar(); + break; } case '\\': { @@ -449,7 +459,7 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero, } } - if (LookingAt<Letter>()) { + if (LookingAt<Letter>() && require_space_after_number_) { AddError("Need space between number and identifier."); } else if (current_char_ == '.') { if (is_float) { @@ -618,6 +628,12 @@ bool Tokenizer::Next() { ConsumeString('\''); current_.type = TYPE_STRING; } else { + // Check if the high order bit is set. + if (current_char_ & 0x80) { + error_collector_->AddError(line_, column_, + StringPrintf("Interpreting non ascii codepoint %d.", + static_cast<unsigned char>(current_char_))); + } NextChar(); current_.type = TYPE_SYMBOL; } @@ -1086,6 +1102,26 @@ void Tokenizer::ParseStringAppend(const string& text, string* output) { } } +template<typename CharacterClass> +static bool AllInClass(const string& s) { + for (int i = 0; i < s.size(); ++i) { + if (!CharacterClass::InClass(s[i])) + return false; + } + return true; +} + +bool Tokenizer::IsIdentifier(const string& text) { + // Mirrors IDENTIFIER definition in Tokenizer::Next() above. + if (text.size() == 0) + return false; + if (!Letter::InClass(text.at(0))) + return false; + if (!AllInClass<Alphanumeric>(text.substr(1))) + return false; + return true; +} + } // namespace io } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h index 6b727d9f..2f07116e 100644 --- a/src/google/protobuf/io/tokenizer.h +++ b/src/google/protobuf/io/tokenizer.h @@ -229,6 +229,21 @@ class LIBPROTOBUF_EXPORT Tokenizer { // Sets the comment style. void set_comment_style(CommentStyle style) { comment_style_ = style; } + // Whether to require whitespace between a number and a field name. + // Default is true. Do not use this; for Google-internal cleanup only. + void set_require_space_after_number(bool require) { + require_space_after_number_ = require; + } + + // Whether to allow string literals to span multiple lines. Default is false. + // Do not use this; for Google-internal cleanup only. + void set_allow_multiline_strings(bool allow) { + allow_multiline_strings_ = allow; + } + + // External helper: validate an identifier. + static bool IsIdentifier(const string& text); + // ----------------------------------------------------------------- private: GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer); @@ -259,6 +274,8 @@ class LIBPROTOBUF_EXPORT Tokenizer { // Options. bool allow_f_after_float_; CommentStyle comment_style_; + bool require_space_after_number_; + bool allow_multiline_strings_; // Since we count columns we need to interpret tabs somehow. We'll take // the standard 8-character definition for lack of any way to do better. diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc index dbb5be4f..b39279b7 100644 --- a/src/google/protobuf/io/tokenizer_unittest.cc +++ b/src/google/protobuf/io/tokenizer_unittest.cc @@ -411,12 +411,6 @@ MultiTokenCase kMultiTokenCases[] = { { Tokenizer::TYPE_END , "" , 1, 3, 3 }, }}, - // Bytes with the high-order bit set should not be seen as control characters. - { "\300", { - { Tokenizer::TYPE_SYMBOL, "\300", 0, 0, 1 }, - { Tokenizer::TYPE_END , "" , 0, 1, 1 }, - }}, - // Test all whitespace chars { "foo\n\t\r\v\fbar", { { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 }, @@ -741,7 +735,7 @@ TEST_F(TokenizerTest, ParseInteger) { EXPECT_EQ(0, ParseInteger("0x")); uint64 i; -#ifdef PROTOBUF_HASDEATH_TEST // death tests do not work on Windows yet +#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet // Test invalid integers that will never be tokenized as integers. EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("zxy", kuint64max, &i), "passed text that could not have been tokenized as an integer"); @@ -753,7 +747,7 @@ TEST_F(TokenizerTest, ParseInteger) { "passed text that could not have been tokenized as an integer"); EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("-1", kuint64max, &i), "passed text that could not have been tokenized as an integer"); -#endif // PROTOBUF_HASDEATH_TEST +#endif // PROTOBUF_HAS_DEATH_TEST // Test overflows. EXPECT_TRUE (Tokenizer::ParseInteger("0", 0, &i)); @@ -796,7 +790,7 @@ TEST_F(TokenizerTest, ParseFloat) { EXPECT_EQ( 0.0, Tokenizer::ParseFloat("1e-9999999999999999999999999999")); EXPECT_EQ(HUGE_VAL, Tokenizer::ParseFloat("1e+9999999999999999999999999999")); -#ifdef PROTOBUF_HASDEATH_TEST // death tests do not work on Windows yet +#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet // Test invalid integers that will never be tokenized as integers. EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("zxy"), "passed text that could not have been tokenized as a float"); @@ -804,7 +798,7 @@ TEST_F(TokenizerTest, ParseFloat) { "passed text that could not have been tokenized as a float"); EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("-1.0"), "passed text that could not have been tokenized as a float"); -#endif // PROTOBUF_HASDEATH_TEST +#endif // PROTOBUF_HAS_DEATH_TEST } TEST_F(TokenizerTest, ParseString) { @@ -843,10 +837,10 @@ TEST_F(TokenizerTest, ParseString) { EXPECT_EQ("u0", output); // Test invalid strings that will never be tokenized as strings. -#ifdef PROTOBUF_HASDEATH_TEST // death tests do not work on Windows yet +#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet EXPECT_DEBUG_DEATH(Tokenizer::ParseString("", &output), "passed text that could not have been tokenized as a string"); -#endif // PROTOBUF_HASDEATH_TEST +#endif // PROTOBUF_HAS_DEATH_TEST } TEST_F(TokenizerTest, ParseStringAppend) { @@ -883,7 +877,7 @@ ErrorCase kErrorCases[] = { { "'\\x' foo", true, "0:3: Expected hex digits for escape sequence.\n" }, { "'foo", false, - "0:4: String literals cannot cross line boundaries.\n" }, + "0:4: Unexpected end of string.\n" }, { "'bar\nfoo", true, "0:4: String literals cannot cross line boundaries.\n" }, { "'\\u01' foo", true, @@ -951,6 +945,10 @@ ErrorCase kErrorCases[] = { "0:0: Invalid control characters encountered in text.\n" }, { string("\0\0foo", 5), true, "0:0: Invalid control characters encountered in text.\n" }, + + // Check error from high order bits set + { "\300foo", true, + "0:0: Interpreting non ascii codepoint 192.\n" }, }; TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) { diff --git a/src/google/protobuf/io/zero_copy_stream.cc b/src/google/protobuf/io/zero_copy_stream.cc index dad6ff14..4d53f29b 100644 --- a/src/google/protobuf/io/zero_copy_stream.cc +++ b/src/google/protobuf/io/zero_copy_stream.cc @@ -34,6 +34,7 @@ #include <google/protobuf/io/zero_copy_stream.h> +#include <google/protobuf/stubs/common.h> namespace google { namespace protobuf { @@ -43,6 +44,14 @@ ZeroCopyInputStream::~ZeroCopyInputStream() {} ZeroCopyOutputStream::~ZeroCopyOutputStream() {} +bool ZeroCopyOutputStream::WriteAliasedRaw(const void* /* data */, + int /* size */) { + GOOGLE_LOG(FATAL) << "This ZeroCopyOutputStream doesn't support aliasing. " + "Reaching here usually means a ZeroCopyOutputStream " + "implementation bug."; + return false; +} + } // namespace io } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/io/zero_copy_stream.h b/src/google/protobuf/io/zero_copy_stream.h index db5326f7..f8921228 100644 --- a/src/google/protobuf/io/zero_copy_stream.h +++ b/src/google/protobuf/io/zero_copy_stream.h @@ -226,6 +226,16 @@ class LIBPROTOBUF_EXPORT ZeroCopyOutputStream { // Returns the total number of bytes written since this object was created. virtual int64 ByteCount() const = 0; + // Write a given chunk of data to the output. Some output streams may + // implement this in a way that avoids copying. Check AllowsAliasing() before + // calling WriteAliasedRaw(). It will GOOGLE_CHECK fail if WriteAliasedRaw() is + // called on a stream that does not allow aliasing. + // + // NOTE: It is caller's responsibility to ensure that the chunk of memory + // remains live until all of the data has been consumed from the stream. + virtual bool WriteAliasedRaw(const void* data, int size); + virtual bool AllowsAliasing() const { return false; } + private: GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyOutputStream); diff --git a/src/google/protobuf/io/zero_copy_stream_impl.cc b/src/google/protobuf/io/zero_copy_stream_impl.cc index 9fcbb622..7829a29f 100644 --- a/src/google/protobuf/io/zero_copy_stream_impl.cc +++ b/src/google/protobuf/io/zero_copy_stream_impl.cc @@ -413,7 +413,9 @@ int64 ConcatenatingInputStream::ByteCount() const { LimitingInputStream::LimitingInputStream(ZeroCopyInputStream* input, int64 limit) - : input_(input), limit_(limit) {} + : input_(input), limit_(limit) { + prior_bytes_read_ = input_->ByteCount(); +} LimitingInputStream::~LimitingInputStream() { // If we overshot the limit, back up. @@ -457,9 +459,9 @@ bool LimitingInputStream::Skip(int count) { int64 LimitingInputStream::ByteCount() const { if (limit_ < 0) { - return input_->ByteCount() + limit_; + return input_->ByteCount() + limit_ - prior_bytes_read_; } else { - return input_->ByteCount(); + return input_->ByteCount() - prior_bytes_read_; } } diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h index 9fedb005..83827097 100644 --- a/src/google/protobuf/io/zero_copy_stream_impl.h +++ b/src/google/protobuf/io/zero_copy_stream_impl.h @@ -344,6 +344,7 @@ class LIBPROTOBUF_EXPORT LimitingInputStream : public ZeroCopyInputStream { private: ZeroCopyInputStream* input_; int64 limit_; // Decreases as we go, becomes negative if we overshoot. + int64 prior_bytes_read_; // Bytes read on underlying stream at construction GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LimitingInputStream); }; diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc index b3a71ce3..d186a98b 100644 --- a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc +++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc @@ -33,10 +33,12 @@ // Sanjay Ghemawat, Jeff Dean, and others. #include <google/protobuf/io/zero_copy_stream_impl_lite.h> -#include <google/protobuf/stubs/common.h> -#include <google/protobuf/stubs/stl_util.h> #include <algorithm> +#include <limits> + +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/stubs/stl_util.h> namespace google { namespace protobuf { @@ -161,15 +163,23 @@ bool StringOutputStream::Next(void** data, int* size) { // without a memory allocation this way. STLStringResizeUninitialized(target_, target_->capacity()); } else { - // Size has reached capacity, so double the size. Also make sure - // that the new size is at least kMinimumSize. + // Size has reached capacity, try to double the size. + if (old_size > std::numeric_limits<int>::max() / 2) { + // Can not double the size otherwise it is going to cause integer + // overflow in the expression below: old_size * 2 "; + GOOGLE_LOG(ERROR) << "Cannot allocate buffer larger than kint32max for " + << "StringOutputStream."; + return false; + } + // Double the size, also make sure that the new size is at least + // kMinimumSize. STLStringResizeUninitialized( target_, max(old_size * 2, kMinimumSize + 0)); // "+ 0" works around GCC4 weirdness. } - *data = string_as_array(target_) + old_size; + *data = mutable_string_data(target_) + old_size; *size = target_->size() - old_size; return true; } diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.h b/src/google/protobuf/io/zero_copy_stream_impl_lite.h index 153f543e..b980143e 100644 --- a/src/google/protobuf/io/zero_copy_stream_impl_lite.h +++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.h @@ -48,6 +48,7 @@ #include <iosfwd> #include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/stubs/common.h> +#include <google/protobuf/stubs/stl_util.h> namespace google { @@ -333,6 +334,19 @@ class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStrea // =================================================================== +// Return a pointer to mutable characters underlying the given string. The +// return value is valid until the next time the string is resized. We +// trust the caller to treat the return value as an array of length s->size(). +inline char* mutable_string_data(string* s) { +#ifdef LANG_CXX11 + // This should be simpler & faster than string_as_array() because the latter + // is guaranteed to return NULL when *s is empty, so it has to check for that. + return &(*s)[0]; +#else + return string_as_array(s); +#endif +} + } // namespace io } // namespace protobuf diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc index 6f155df7..75eb2a43 100644 --- a/src/google/protobuf/io/zero_copy_stream_unittest.cc +++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc @@ -560,9 +560,10 @@ TEST_F(IoTest, CompressionOptions) { // Some ad-hoc testing of compression options. string golden; - File::ReadFileToStringOrDie( - TestSourceDir() + "/google/protobuf/testdata/golden_message", - &golden); + GOOGLE_CHECK_OK(File::GetContents( + TestSourceDir() + + "/google/protobuf/testdata/golden_message", + &golden, true)); GzipOutputStream::Options options; string gzip_compressed = Compress(golden, options); @@ -923,6 +924,26 @@ TEST_F(IoTest, LimitingInputStream) { ReadStuff(&input); } +// Checks that ByteCount works correctly for LimitingInputStreams where the +// underlying stream has already been read. +TEST_F(IoTest, LimitingInputStreamByteCount) { + const int kHalfBufferSize = 128; + const int kBufferSize = kHalfBufferSize * 2; + uint8 buffer[kBufferSize]; + + // Set up input. Only allow half to be read at once. + ArrayInputStream array_input(buffer, kBufferSize, kHalfBufferSize); + const void* data; + int size; + EXPECT_TRUE(array_input.Next(&data, &size)); + EXPECT_EQ(kHalfBufferSize, array_input.ByteCount()); + // kHalfBufferSize - 1 to test limiting logic as well. + LimitingInputStream input(&array_input, kHalfBufferSize - 1); + EXPECT_EQ(0, input.ByteCount()); + EXPECT_TRUE(input.Next(&data, &size)); + EXPECT_EQ(kHalfBufferSize - 1 , input.ByteCount()); +} + // Check that a zero-size array doesn't confuse the code. TEST(ZeroSizeArray, Input) { ArrayInputStream input(NULL, 0); |