aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/google/protobuf/io
diff options
context:
space:
mode:
authorGravatar jieluo@google.com <jieluo@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2014-07-18 00:47:59 +0000
committerGravatar jieluo@google.com <jieluo@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2014-07-18 00:47:59 +0000
commit4de8f55113007fdc8e34107950e605fc0209d465 (patch)
tree92b7da8757a7740d9e1f2d3ead233542947d8c8c /src/google/protobuf/io
parentc5553a3d18f80132b9079c5504bc0aa1f7f950a0 (diff)
down integrate to svn
Diffstat (limited to 'src/google/protobuf/io')
-rw-r--r--src/google/protobuf/io/coded_stream.cc101
-rw-r--r--src/google/protobuf/io/coded_stream.h94
-rw-r--r--src/google/protobuf/io/coded_stream_inl.h3
-rw-r--r--src/google/protobuf/io/coded_stream_unittest.cc193
-rw-r--r--src/google/protobuf/io/gzip_stream.h2
-rw-r--r--src/google/protobuf/io/printer_unittest.cc2
-rw-r--r--src/google/protobuf/io/strtod.cc113
-rw-r--r--src/google/protobuf/io/strtod.h50
-rw-r--r--src/google/protobuf/io/tokenizer.cc44
-rw-r--r--src/google/protobuf/io/tokenizer.h17
-rw-r--r--src/google/protobuf/io/tokenizer_unittest.cc24
-rw-r--r--src/google/protobuf/io/zero_copy_stream.cc9
-rw-r--r--src/google/protobuf/io/zero_copy_stream.h10
-rw-r--r--src/google/protobuf/io/zero_copy_stream_impl.cc8
-rw-r--r--src/google/protobuf/io/zero_copy_stream_impl.h1
-rw-r--r--src/google/protobuf/io/zero_copy_stream_impl_lite.cc20
-rw-r--r--src/google/protobuf/io/zero_copy_stream_impl_lite.h14
-rw-r--r--src/google/protobuf/io/zero_copy_stream_unittest.cc27
18 files changed, 674 insertions, 58 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc
index 36add8c3..f6a84533 100644
--- a/src/google/protobuf/io/coded_stream.cc
+++ b/src/google/protobuf/io/coded_stream.cc
@@ -83,6 +83,10 @@ CodedInputStream::~CodedInputStream() {
int CodedInputStream::default_recursion_limit_ = 100;
+void CodedOutputStream::EnableAliasing(bool enabled) {
+ aliasing_enabled_ = enabled && output_->AllowsAliasing();
+}
+
void CodedInputStream::BackUpInputToCurrentPosition() {
int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_;
if (backup_bytes > 0) {
@@ -167,6 +171,11 @@ void CodedInputStream::SetTotalBytesLimit(
RecomputeBufferLimits();
}
+int CodedInputStream::BytesUntilTotalBytesLimit() const {
+ if (total_bytes_limit_ == INT_MAX) return -1;
+ return total_bytes_limit_ - CurrentPosition();
+}
+
void CodedInputStream::PrintTotalBytesLimitError() {
GOOGLE_LOG(ERROR) << "A protocol message was rejected because it was too "
"big (more than " << total_bytes_limit_
@@ -247,6 +256,14 @@ bool CodedInputStream::ReadStringFallback(string* buffer, int size) {
buffer->clear();
}
+ int closest_limit = min(current_limit_, total_bytes_limit_);
+ if (closest_limit != INT_MAX) {
+ int bytes_to_limit = closest_limit - CurrentPosition();
+ if (bytes_to_limit > 0 && size > 0 && size <= bytes_to_limit) {
+ buffer->reserve(size);
+ }
+ }
+
int current_buffer_size;
while ((current_buffer_size = BufferSize()) < size) {
// Some STL implementations "helpfully" crash on buffer->append(NULL, 0).
@@ -313,11 +330,16 @@ inline const uint8* ReadVarint32FromArray(const uint8* buffer, uint32* value) {
uint32 b;
uint32 result;
- b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done;
- b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
- b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
- b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
- b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result = b ; if (!(b & 0x80)) goto done;
+ result -= 0x80;
+ b = *(ptr++); result += b << 7; if (!(b & 0x80)) goto done;
+ result -= 0x80 << 7;
+ b = *(ptr++); result += b << 14; if (!(b & 0x80)) goto done;
+ result -= 0x80 << 14;
+ b = *(ptr++); result += b << 21; if (!(b & 0x80)) goto done;
+ result -= 0x80 << 21;
+ b = *(ptr++); result += b << 28; if (!(b & 0x80)) goto done;
+ // "result -= 0x80 << 28" is irrevelant.
// If the input is larger than 32 bits, we still need to read it all
// and discard the high-order bits.
@@ -347,8 +369,8 @@ bool CodedInputStream::ReadVarint32Slow(uint32* value) {
bool CodedInputStream::ReadVarint32Fallback(uint32* value) {
if (BufferSize() >= kMaxVarintBytes ||
- // Optimization: If the varint ends at exactly the end of the buffer,
- // we can detect that and still use the fast path.
+ // Optimization: We're also safe if the buffer is non-empty and it ends
+ // with a byte that would terminate a varint.
(buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
const uint8* end = ReadVarint32FromArray(buffer_, value);
if (end == NULL) return false;
@@ -391,8 +413,8 @@ uint32 CodedInputStream::ReadTagSlow() {
uint32 CodedInputStream::ReadTagFallback() {
const int buf_size = BufferSize();
if (buf_size >= kMaxVarintBytes ||
- // Optimization: If the varint ends at exactly the end of the buffer,
- // we can detect that and still use the fast path.
+ // Optimization: We're also safe if the buffer is non-empty and it ends
+ // with a byte that would terminate a varint.
(buf_size > 0 && !(buffer_end_[-1] & 0x80))) {
uint32 tag;
const uint8* end = ReadVarint32FromArray(buffer_, &tag);
@@ -444,8 +466,8 @@ bool CodedInputStream::ReadVarint64Slow(uint64* value) {
bool CodedInputStream::ReadVarint64Fallback(uint64* value) {
if (BufferSize() >= kMaxVarintBytes ||
- // Optimization: If the varint ends at exactly the end of the buffer,
- // we can detect that and still use the fast path.
+ // Optimization: We're also safe if the buffer is non-empty and it ends
+ // with a byte that would terminate a varint.
(buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
// Fast path: We have enough bytes left in the buffer to guarantee that
// this read won't cross the end, so we can skip the checks.
@@ -457,16 +479,26 @@ bool CodedInputStream::ReadVarint64Fallback(uint64* value) {
// processors.
uint32 part0 = 0, part1 = 0, part2 = 0;
- b = *(ptr++); part0 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
- b = *(ptr++); part0 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
- b = *(ptr++); part0 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
- b = *(ptr++); part0 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
- b = *(ptr++); part1 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
- b = *(ptr++); part1 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
- b = *(ptr++); part1 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
- b = *(ptr++); part1 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
- b = *(ptr++); part2 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
- b = *(ptr++); part2 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part0 = b ; if (!(b & 0x80)) goto done;
+ part0 -= 0x80;
+ b = *(ptr++); part0 += b << 7; if (!(b & 0x80)) goto done;
+ part0 -= 0x80 << 7;
+ b = *(ptr++); part0 += b << 14; if (!(b & 0x80)) goto done;
+ part0 -= 0x80 << 14;
+ b = *(ptr++); part0 += b << 21; if (!(b & 0x80)) goto done;
+ part0 -= 0x80 << 21;
+ b = *(ptr++); part1 = b ; if (!(b & 0x80)) goto done;
+ part1 -= 0x80;
+ b = *(ptr++); part1 += b << 7; if (!(b & 0x80)) goto done;
+ part1 -= 0x80 << 7;
+ b = *(ptr++); part1 += b << 14; if (!(b & 0x80)) goto done;
+ part1 -= 0x80 << 14;
+ b = *(ptr++); part1 += b << 21; if (!(b & 0x80)) goto done;
+ part1 -= 0x80 << 21;
+ b = *(ptr++); part2 = b ; if (!(b & 0x80)) goto done;
+ part2 -= 0x80;
+ b = *(ptr++); part2 += b << 7; if (!(b & 0x80)) goto done;
+ // "part2 -= 0x80 << 7" is irrelevant because (0x80 << 7) << 56 is 0.
// We have overrun the maximum size of a varint (10 bytes). The data
// must be corrupt.
@@ -555,7 +587,8 @@ CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output)
buffer_(NULL),
buffer_size_(0),
total_bytes_(0),
- had_error_(false) {
+ had_error_(false),
+ aliasing_enabled_(false) {
// Eagerly Refresh() so buffer space is immediately available.
Refresh();
// The Refresh() may have failed. If the client doesn't write any data,
@@ -609,6 +642,23 @@ uint8* CodedOutputStream::WriteRawToArray(
}
+void CodedOutputStream::WriteAliasedRaw(const void* data, int size) {
+ if (size < buffer_size_
+ ) {
+ WriteRaw(data, size);
+ } else {
+ if (buffer_size_ > 0) {
+ output_->BackUp(buffer_size_);
+ total_bytes_ -= buffer_size_;
+ buffer_ = NULL;
+ buffer_size_ = 0;
+ }
+
+ total_bytes_ += size;
+ had_error_ |= !output_->WriteAliasedRaw(data, size);
+ }
+}
+
void CodedOutputStream::WriteLittleEndian32(uint32 value) {
uint8 bytes[sizeof(value)];
@@ -852,6 +902,13 @@ int CodedOutputStream::VarintSize64(uint64 value) {
}
}
+uint8* CodedOutputStream::WriteStringWithSizeToArray(const string& str,
+ uint8* target) {
+ GOOGLE_DCHECK_LE(str.size(), kuint32max);
+ target = WriteVarint32ToArray(str.size(), target);
+ return WriteStringToArray(str, target);
+}
+
} // namespace io
} // namespace protobuf
} // namespace google
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h
index 66cbee00..50a03a11 100644
--- a/src/google/protobuf/io/coded_stream.h
+++ b/src/google/protobuf/io/coded_stream.h
@@ -233,11 +233,22 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// Read a tag. This calls ReadVarint32() and returns the result, or returns
// zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates
// the last tag value, which can be checked with LastTagWas().
- // Always inline because this is only called in once place per parse loop
+ // Always inline because this is only called in one place per parse loop
// but it is called for every iteration of said loop, so it should be fast.
// GCC doesn't want to inline this by default.
uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+ // This usually a faster alternative to ReadTag() when cutoff is a manifest
+ // constant. It does particularly well for cutoff >= 127. The first part
+ // of the return value is the tag that was read, though it can also be 0 in
+ // the cases where ReadTag() would return 0. If the second part is true
+ // then the tag is known to be in [0, cutoff]. If not, the tag either is
+ // above cutoff or is 0. (There's intentional wiggle room when tag is 0,
+ // because that can arise in several ways, and for best performance we want
+ // to avoid an extra "is tag == 0?" check here.)
+ inline std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff)
+ GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
// Usually returns true if calling ReadVarint32() now would produce the given
// value. Will always return false if ReadVarint32() would not return the
// given value. If ExpectTag() returns true, it also advances past
@@ -264,8 +275,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// zero, and ConsumedEntireMessage() will return true.
bool ExpectAtEnd();
- // If the last call to ReadTag() returned the given value, returns true.
- // Otherwise, returns false;
+ // If the last call to ReadTag() or ReadTagWithCutoff() returned the
+ // given value, returns true. Otherwise, returns false;
//
// This is needed because parsers for some types of embedded messages
// (with field type TYPE_GROUP) don't actually know that they've reached the
@@ -333,7 +344,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// cause integer overflows is 512MB. The default limit is 64MB. Apps
// should set shorter limits if possible. If warning_threshold is not -1,
// a warning will be printed to stderr after warning_threshold bytes are
- // read. For backwards compatibility all negative values get squached to -1,
+ // read. For backwards compatibility all negative values get squashed to -1,
// as other negative values might have special internal meanings.
// An error will always be printed to stderr if the limit is reached.
//
@@ -356,6 +367,10 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// something unusual.
void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
+ // The Total Bytes Limit minus the Current Position, or -1 if there
+ // is no Total Bytes Limit.
+ int BytesUntilTotalBytesLimit() const;
+
// Recursion Limit -------------------------------------------------
// To prevent corrupt or malicious messages from causing stack overflows,
// we must keep track of the depth of recursion when parsing embedded
@@ -466,7 +481,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
int overflow_bytes_;
// LastTagWas() stuff.
- uint32 last_tag_; // result of last ReadTag().
+ uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff().
// This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
// at EOF, or by ExpectAtEnd() when it returns true. This happens when we
@@ -638,6 +653,9 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
// Write raw bytes, copying them from the given buffer.
void WriteRaw(const void* buffer, int size);
+ // Like WriteRaw() but will try to write aliased data if aliasing is
+ // turned on.
+ void WriteRawMaybeAliased(const void* data, int size);
// Like WriteRaw() but writing directly to the target array.
// This is _not_ inlined, as the compiler often optimizes memcpy into inline
// copy loops. Since this gets called by every field with string or bytes
@@ -649,8 +667,21 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
void WriteString(const string& str);
// Like WriteString() but writing directly to the target array.
static uint8* WriteStringToArray(const string& str, uint8* target);
+ // Write the varint-encoded size of str followed by str.
+ static uint8* WriteStringWithSizeToArray(const string& str, uint8* target);
+ // Instructs the CodedOutputStream to allow the underlying
+ // ZeroCopyOutputStream to hold pointers to the original structure instead of
+ // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the
+ // underlying stream does not support aliasing, then enabling it has no
+ // affect. For now, this only affects the behavior of
+ // WriteRawMaybeAliased().
+ //
+ // NOTE: It is caller's responsibility to ensure that the chunk of memory
+ // remains live until all of the data has been consumed from the stream.
+ void EnableAliasing(bool enabled);
+
// Write a 32-bit little-endian integer.
void WriteLittleEndian32(uint32 value);
// Like WriteLittleEndian32() but writing directly to the target array.
@@ -725,6 +756,7 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
int buffer_size_;
int total_bytes_; // Sum of sizes of all buffers seen so far.
bool had_error_; // Whether an error occurred during output.
+ bool aliasing_enabled_; // See EnableAliasing().
// Advance the buffer by a given number of bytes.
void Advance(int amount);
@@ -733,6 +765,10 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
// Advance(buffer_size_).
bool Refresh();
+ // Like WriteRaw() but may avoid copying if the underlying
+ // ZeroCopyOutputStream supports it.
+ void WriteAliasedRaw(const void* buffer, int size);
+
static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target);
// Always-inlined versions of WriteVarint* functions so that code can be
@@ -850,6 +886,45 @@ inline uint32 CodedInputStream::ReadTag() {
}
}
+inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
+ uint32 cutoff) {
+ // In performance-sensitive code we can expect cutoff to be a compile-time
+ // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
+ // compile time.
+ if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
+ // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
+ // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
+ // is large enough then is it better to check for the two-byte case first?
+ if (static_cast<int8>(buffer_[0]) > 0) {
+ const uint32 kMax1ByteVarint = 0x7f;
+ uint32 tag = last_tag_ = buffer_[0];
+ Advance(1);
+ return make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
+ }
+ // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
+ // and tag is two bytes. The latter is tested by bitwise-and-not of the
+ // first byte and the second byte.
+ if (cutoff >= 0x80 &&
+ GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
+ GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
+ const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
+ uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
+ Advance(2);
+ // It might make sense to test for tag == 0 now, but it is so rare that
+ // that we don't bother. A varint-encoded 0 should be one byte unless
+ // the encoder lost its mind. The second part of the return value of
+ // this function is allowed to be either true or false if the tag is 0,
+ // so we don't have to check for tag == 0. We may need to check whether
+ // it exceeds cutoff.
+ bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
+ return make_pair(tag, at_or_below_cutoff);
+ }
+ }
+ // Slow path
+ last_tag_ = ReadTagFallback();
+ return make_pair(last_tag_, static_cast<uint32>(last_tag_ - 1) < cutoff);
+}
+
inline bool CodedInputStream::LastTagWas(uint32 expected) {
return last_tag_ == expected;
}
@@ -1029,6 +1104,15 @@ inline void CodedOutputStream::WriteString(const string& str) {
WriteRaw(str.data(), static_cast<int>(str.size()));
}
+inline void CodedOutputStream::WriteRawMaybeAliased(
+ const void* data, int size) {
+ if (aliasing_enabled_) {
+ WriteAliasedRaw(data, size);
+ } else {
+ WriteRaw(data, size);
+ }
+}
+
inline uint8* CodedOutputStream::WriteStringToArray(
const string& str, uint8* target) {
return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
diff --git a/src/google/protobuf/io/coded_stream_inl.h b/src/google/protobuf/io/coded_stream_inl.h
index 144f44f0..41dc10e3 100644
--- a/src/google/protobuf/io/coded_stream_inl.h
+++ b/src/google/protobuf/io/coded_stream_inl.h
@@ -37,6 +37,7 @@
#define GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__
#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
#include <string>
#include <google/protobuf/stubs/stl_util.h>
@@ -53,7 +54,7 @@ inline bool CodedInputStream::InternalReadStringInline(string* buffer,
// When buffer is empty, string_as_array(buffer) will return NULL but memcpy
// requires non-NULL pointers even when size is 0. Hench this check.
if (size > 0) {
- memcpy(string_as_array(buffer), buffer_, size);
+ memcpy(mutable_string_data(buffer), buffer_, size);
Advance(size);
}
return true;
diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc
index 2daab194..b39987ca 100644
--- a/src/google/protobuf/io/coded_stream_unittest.cc
+++ b/src/google/protobuf/io/coded_stream_unittest.cc
@@ -144,6 +144,10 @@ uint8 CodedStreamTest::buffer_[CodedStreamTest::kBufferSize];
// checks.
const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024};
+// In several ReadCord test functions, we either clear the Cord before ReadCord
+// calls or not.
+const bool kResetCords[] = {false, true};
+
// -------------------------------------------------------------------
// Varint tests.
@@ -682,6 +686,191 @@ TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnHeap) {
EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
}
+TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnTotalLimit, kBlockSizes) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.SetTotalBytesLimit(sizeof(kRawBytes), sizeof(kRawBytes));
+ EXPECT_EQ(sizeof(kRawBytes), coded_input.BytesUntilTotalBytesLimit());
+
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ EXPECT_EQ(sizeof(kRawBytes) - strlen(kRawBytes),
+ coded_input.BytesUntilTotalBytesLimit());
+ EXPECT_EQ(kRawBytes, str);
+ // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
+ EXPECT_GE(str.capacity(), strlen(kRawBytes));
+ }
+
+ EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnPushedLimit, kBlockSizes) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.PushLimit(sizeof(buffer_));
+
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ EXPECT_EQ(kRawBytes, str);
+ // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
+ EXPECT_GE(str.capacity(), strlen(kRawBytes));
+ }
+
+ EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationIfLimitsNotSet) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ EXPECT_EQ(kRawBytes, str);
+ // Note: this check depends on string class implementation. It
+ // expects that string will allocate more than strlen(kRawBytes)
+ // if the content of kRawBytes is appended to string in small
+ // chunks.
+ // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
+ EXPECT_GE(str.capacity(), strlen(kRawBytes));
+ }
+
+ EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsNegative) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.PushLimit(sizeof(buffer_));
+
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, -1));
+ // Note: this check depends on string class implementation. It
+ // expects that string will always allocate the same amount of
+ // memory for an empty string.
+ EXPECT_EQ(string().capacity(), str.capacity());
+ }
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsLarge) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.PushLimit(sizeof(buffer_));
+
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
+ EXPECT_GT(1 << 30, str.capacity());
+ }
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheLimit) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.PushLimit(16);
+
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ // Note: this check depends on string class implementation. It
+ // expects that string will allocate less than strlen(kRawBytes)
+ // for an empty string.
+ EXPECT_GT(strlen(kRawBytes), str.capacity());
+ }
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheTotalBytesLimit) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.SetTotalBytesLimit(16, 16);
+
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ // Note: this check depends on string class implementation. It
+ // expects that string will allocate less than strlen(kRawBytes)
+ // for an empty string.
+ EXPECT_GT(strlen(kRawBytes), str.capacity());
+ }
+}
+
+TEST_F(CodedStreamTest,
+ ReadStringNoReservationSizeIsOverTheClosestLimit_GlobalLimitIsCloser) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.PushLimit(sizeof(buffer_));
+ coded_input.SetTotalBytesLimit(16, 16);
+
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ // Note: this check depends on string class implementation. It
+ // expects that string will allocate less than strlen(kRawBytes)
+ // for an empty string.
+ EXPECT_GT(strlen(kRawBytes), str.capacity());
+ }
+}
+
+TEST_F(CodedStreamTest,
+ ReadStringNoReservationSizeIsOverTheClosestLimit_LocalLimitIsCloser) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.PushLimit(16);
+ coded_input.SetTotalBytesLimit(sizeof(buffer_), sizeof(buffer_));
+ EXPECT_EQ(sizeof(buffer_), coded_input.BytesUntilTotalBytesLimit());
+
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ // Note: this check depends on string class implementation. It
+ // expects that string will allocate less than strlen(kRawBytes)
+ // for an empty string.
+ EXPECT_GT(strlen(kRawBytes), str.capacity());
+ }
+}
+
// -------------------------------------------------------------------
// Skip
@@ -980,9 +1169,11 @@ TEST_F(CodedStreamTest, TotalBytesLimit) {
ArrayInputStream input(buffer_, sizeof(buffer_));
CodedInputStream coded_input(&input);
coded_input.SetTotalBytesLimit(16, -1);
+ EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit());
string str;
EXPECT_TRUE(coded_input.ReadString(&str, 16));
+ EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit());
vector<string> errors;
@@ -997,7 +1188,9 @@ TEST_F(CodedStreamTest, TotalBytesLimit) {
"A protocol message was rejected because it was too big", errors[0]);
coded_input.SetTotalBytesLimit(32, -1);
+ EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit());
EXPECT_TRUE(coded_input.ReadString(&str, 16));
+ EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit());
}
TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) {
diff --git a/src/google/protobuf/io/gzip_stream.h b/src/google/protobuf/io/gzip_stream.h
index 365e9ea5..7ee24bc3 100644
--- a/src/google/protobuf/io/gzip_stream.h
+++ b/src/google/protobuf/io/gzip_stream.h
@@ -118,7 +118,7 @@ class LIBPROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream {
ZLIB = 2,
};
- struct LIBPROTOBUF_EXPORT Options {
+ struct Options {
// Defaults to GZIP.
Format format;
diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc
index c9b30359..76fb9442 100644
--- a/src/google/protobuf/io/printer_unittest.cc
+++ b/src/google/protobuf/io/printer_unittest.cc
@@ -231,7 +231,7 @@ TEST(Printer, Death) {
EXPECT_DEBUG_DEATH(printer.Print("$unclosed"), "Unclosed variable name");
EXPECT_DEBUG_DEATH(printer.Outdent(), "without matching Indent");
}
-#endif // PROTOBUF__HAS_DEATH_TEST
+#endif // PROTOBUF_HAS_DEATH_TEST
TEST(Printer, WriteFailurePartial) {
char buffer[17];
diff --git a/src/google/protobuf/io/strtod.cc b/src/google/protobuf/io/strtod.cc
new file mode 100644
index 00000000..cf0c6e10
--- /dev/null
+++ b/src/google/protobuf/io/strtod.cc
@@ -0,0 +1,113 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <google/protobuf/io/strtod.h>
+
+#include <cstdio>
+#include <cstring>
+#include <string>
+
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+// ----------------------------------------------------------------------
+// NoLocaleStrtod()
+// This code will make you cry.
+// ----------------------------------------------------------------------
+
+namespace {
+
+// Returns a string identical to *input except that the character pointed to
+// by radix_pos (which should be '.') is replaced with the locale-specific
+// radix character.
+string LocalizeRadix(const char* input, const char* radix_pos) {
+ // Determine the locale-specific radix character by calling sprintf() to
+ // print the number 1.5, then stripping off the digits. As far as I can
+ // tell, this is the only portable, thread-safe way to get the C library
+ // to divuldge the locale's radix character. No, localeconv() is NOT
+ // thread-safe.
+ char temp[16];
+ int size = sprintf(temp, "%.1f", 1.5);
+ GOOGLE_CHECK_EQ(temp[0], '1');
+ GOOGLE_CHECK_EQ(temp[size-1], '5');
+ GOOGLE_CHECK_LE(size, 6);
+
+ // Now replace the '.' in the input with it.
+ string result;
+ result.reserve(strlen(input) + size - 3);
+ result.append(input, radix_pos);
+ result.append(temp + 1, size - 2);
+ result.append(radix_pos + 1);
+ return result;
+}
+
+} // namespace
+
+double NoLocaleStrtod(const char* text, char** original_endptr) {
+ // We cannot simply set the locale to "C" temporarily with setlocale()
+ // as this is not thread-safe. Instead, we try to parse in the current
+ // locale first. If parsing stops at a '.' character, then this is a
+ // pretty good hint that we're actually in some other locale in which
+ // '.' is not the radix character.
+
+ char* temp_endptr;
+ double result = strtod(text, &temp_endptr);
+ if (original_endptr != NULL) *original_endptr = temp_endptr;
+ if (*temp_endptr != '.') return result;
+
+ // Parsing halted on a '.'. Perhaps we're in a different locale? Let's
+ // try to replace the '.' with a locale-specific radix character and
+ // try again.
+ string localized = LocalizeRadix(text, temp_endptr);
+ const char* localized_cstr = localized.c_str();
+ char* localized_endptr;
+ result = strtod(localized_cstr, &localized_endptr);
+ if ((localized_endptr - localized_cstr) >
+ (temp_endptr - text)) {
+ // This attempt got further, so replacing the decimal must have helped.
+ // Update original_endptr to point at the right location.
+ if (original_endptr != NULL) {
+ // size_diff is non-zero if the localized radix has multiple bytes.
+ int size_diff = localized.size() - strlen(text);
+ // const_cast is necessary to match the strtod() interface.
+ *original_endptr = const_cast<char*>(
+ text + (localized_endptr - localized_cstr - size_diff));
+ }
+ }
+
+ return result;
+}
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/strtod.h b/src/google/protobuf/io/strtod.h
new file mode 100644
index 00000000..2be3c43d
--- /dev/null
+++ b/src/google/protobuf/io/strtod.h
@@ -0,0 +1,50 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// A locale-independent version of strtod(), used to parse floating
+// point default values in .proto files, where the decimal separator
+// is always a dot.
+
+#ifndef GOOGLE_PROTOBUF_IO_STRTOD_H__
+#define GOOGLE_PROTOBUF_IO_STRTOD_H__
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+// A locale-independent version of the standard strtod(), which always
+// uses a dot as the decimal separator.
+double NoLocaleStrtod(const char* str, char** endptr);
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_STRTOD_H__
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
index a022b71d..d149305a 100644
--- a/src/google/protobuf/io/tokenizer.cc
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -91,6 +91,7 @@
#include <google/protobuf/io/tokenizer.h>
#include <google/protobuf/stubs/common.h>
#include <google/protobuf/stubs/stringprintf.h>
+#include <google/protobuf/io/strtod.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/stubs/strutil.h>
#include <google/protobuf/stubs/stl_util.h>
@@ -195,7 +196,9 @@ Tokenizer::Tokenizer(ZeroCopyInputStream* input,
record_target_(NULL),
record_start_(-1),
allow_f_after_float_(false),
- comment_style_(CPP_COMMENT_STYLE) {
+ comment_style_(CPP_COMMENT_STYLE),
+ require_space_after_number_(true),
+ allow_multiline_strings_(false) {
current_.line = 0;
current_.column = 0;
@@ -350,9 +353,16 @@ void Tokenizer::ConsumeString(char delimiter) {
while (true) {
switch (current_char_) {
case '\0':
- case '\n': {
- AddError("String literals cannot cross line boundaries.");
+ AddError("Unexpected end of string.");
return;
+
+ case '\n': {
+ if (!allow_multiline_strings_) {
+ AddError("String literals cannot cross line boundaries.");
+ return;
+ }
+ NextChar();
+ break;
}
case '\\': {
@@ -449,7 +459,7 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
}
}
- if (LookingAt<Letter>()) {
+ if (LookingAt<Letter>() && require_space_after_number_) {
AddError("Need space between number and identifier.");
} else if (current_char_ == '.') {
if (is_float) {
@@ -618,6 +628,12 @@ bool Tokenizer::Next() {
ConsumeString('\'');
current_.type = TYPE_STRING;
} else {
+ // Check if the high order bit is set.
+ if (current_char_ & 0x80) {
+ error_collector_->AddError(line_, column_,
+ StringPrintf("Interpreting non ascii codepoint %d.",
+ static_cast<unsigned char>(current_char_)));
+ }
NextChar();
current_.type = TYPE_SYMBOL;
}
@@ -1086,6 +1102,26 @@ void Tokenizer::ParseStringAppend(const string& text, string* output) {
}
}
+template<typename CharacterClass>
+static bool AllInClass(const string& s) {
+ for (int i = 0; i < s.size(); ++i) {
+ if (!CharacterClass::InClass(s[i]))
+ return false;
+ }
+ return true;
+}
+
+bool Tokenizer::IsIdentifier(const string& text) {
+ // Mirrors IDENTIFIER definition in Tokenizer::Next() above.
+ if (text.size() == 0)
+ return false;
+ if (!Letter::InClass(text.at(0)))
+ return false;
+ if (!AllInClass<Alphanumeric>(text.substr(1)))
+ return false;
+ return true;
+}
+
} // namespace io
} // namespace protobuf
} // namespace google
diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h
index 6b727d9f..2f07116e 100644
--- a/src/google/protobuf/io/tokenizer.h
+++ b/src/google/protobuf/io/tokenizer.h
@@ -229,6 +229,21 @@ class LIBPROTOBUF_EXPORT Tokenizer {
// Sets the comment style.
void set_comment_style(CommentStyle style) { comment_style_ = style; }
+ // Whether to require whitespace between a number and a field name.
+ // Default is true. Do not use this; for Google-internal cleanup only.
+ void set_require_space_after_number(bool require) {
+ require_space_after_number_ = require;
+ }
+
+ // Whether to allow string literals to span multiple lines. Default is false.
+ // Do not use this; for Google-internal cleanup only.
+ void set_allow_multiline_strings(bool allow) {
+ allow_multiline_strings_ = allow;
+ }
+
+ // External helper: validate an identifier.
+ static bool IsIdentifier(const string& text);
+
// -----------------------------------------------------------------
private:
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer);
@@ -259,6 +274,8 @@ class LIBPROTOBUF_EXPORT Tokenizer {
// Options.
bool allow_f_after_float_;
CommentStyle comment_style_;
+ bool require_space_after_number_;
+ bool allow_multiline_strings_;
// Since we count columns we need to interpret tabs somehow. We'll take
// the standard 8-character definition for lack of any way to do better.
diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc
index dbb5be4f..b39279b7 100644
--- a/src/google/protobuf/io/tokenizer_unittest.cc
+++ b/src/google/protobuf/io/tokenizer_unittest.cc
@@ -411,12 +411,6 @@ MultiTokenCase kMultiTokenCases[] = {
{ Tokenizer::TYPE_END , "" , 1, 3, 3 },
}},
- // Bytes with the high-order bit set should not be seen as control characters.
- { "\300", {
- { Tokenizer::TYPE_SYMBOL, "\300", 0, 0, 1 },
- { Tokenizer::TYPE_END , "" , 0, 1, 1 },
- }},
-
// Test all whitespace chars
{ "foo\n\t\r\v\fbar", {
{ Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
@@ -741,7 +735,7 @@ TEST_F(TokenizerTest, ParseInteger) {
EXPECT_EQ(0, ParseInteger("0x"));
uint64 i;
-#ifdef PROTOBUF_HASDEATH_TEST // death tests do not work on Windows yet
+#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet
// Test invalid integers that will never be tokenized as integers.
EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("zxy", kuint64max, &i),
"passed text that could not have been tokenized as an integer");
@@ -753,7 +747,7 @@ TEST_F(TokenizerTest, ParseInteger) {
"passed text that could not have been tokenized as an integer");
EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("-1", kuint64max, &i),
"passed text that could not have been tokenized as an integer");
-#endif // PROTOBUF_HASDEATH_TEST
+#endif // PROTOBUF_HAS_DEATH_TEST
// Test overflows.
EXPECT_TRUE (Tokenizer::ParseInteger("0", 0, &i));
@@ -796,7 +790,7 @@ TEST_F(TokenizerTest, ParseFloat) {
EXPECT_EQ( 0.0, Tokenizer::ParseFloat("1e-9999999999999999999999999999"));
EXPECT_EQ(HUGE_VAL, Tokenizer::ParseFloat("1e+9999999999999999999999999999"));
-#ifdef PROTOBUF_HASDEATH_TEST // death tests do not work on Windows yet
+#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet
// Test invalid integers that will never be tokenized as integers.
EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("zxy"),
"passed text that could not have been tokenized as a float");
@@ -804,7 +798,7 @@ TEST_F(TokenizerTest, ParseFloat) {
"passed text that could not have been tokenized as a float");
EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("-1.0"),
"passed text that could not have been tokenized as a float");
-#endif // PROTOBUF_HASDEATH_TEST
+#endif // PROTOBUF_HAS_DEATH_TEST
}
TEST_F(TokenizerTest, ParseString) {
@@ -843,10 +837,10 @@ TEST_F(TokenizerTest, ParseString) {
EXPECT_EQ("u0", output);
// Test invalid strings that will never be tokenized as strings.
-#ifdef PROTOBUF_HASDEATH_TEST // death tests do not work on Windows yet
+#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet
EXPECT_DEBUG_DEATH(Tokenizer::ParseString("", &output),
"passed text that could not have been tokenized as a string");
-#endif // PROTOBUF_HASDEATH_TEST
+#endif // PROTOBUF_HAS_DEATH_TEST
}
TEST_F(TokenizerTest, ParseStringAppend) {
@@ -883,7 +877,7 @@ ErrorCase kErrorCases[] = {
{ "'\\x' foo", true,
"0:3: Expected hex digits for escape sequence.\n" },
{ "'foo", false,
- "0:4: String literals cannot cross line boundaries.\n" },
+ "0:4: Unexpected end of string.\n" },
{ "'bar\nfoo", true,
"0:4: String literals cannot cross line boundaries.\n" },
{ "'\\u01' foo", true,
@@ -951,6 +945,10 @@ ErrorCase kErrorCases[] = {
"0:0: Invalid control characters encountered in text.\n" },
{ string("\0\0foo", 5), true,
"0:0: Invalid control characters encountered in text.\n" },
+
+ // Check error from high order bits set
+ { "\300foo", true,
+ "0:0: Interpreting non ascii codepoint 192.\n" },
};
TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) {
diff --git a/src/google/protobuf/io/zero_copy_stream.cc b/src/google/protobuf/io/zero_copy_stream.cc
index dad6ff14..4d53f29b 100644
--- a/src/google/protobuf/io/zero_copy_stream.cc
+++ b/src/google/protobuf/io/zero_copy_stream.cc
@@ -34,6 +34,7 @@
#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/stubs/common.h>
namespace google {
namespace protobuf {
@@ -43,6 +44,14 @@ ZeroCopyInputStream::~ZeroCopyInputStream() {}
ZeroCopyOutputStream::~ZeroCopyOutputStream() {}
+bool ZeroCopyOutputStream::WriteAliasedRaw(const void* /* data */,
+ int /* size */) {
+ GOOGLE_LOG(FATAL) << "This ZeroCopyOutputStream doesn't support aliasing. "
+ "Reaching here usually means a ZeroCopyOutputStream "
+ "implementation bug.";
+ return false;
+}
+
} // namespace io
} // namespace protobuf
} // namespace google
diff --git a/src/google/protobuf/io/zero_copy_stream.h b/src/google/protobuf/io/zero_copy_stream.h
index db5326f7..f8921228 100644
--- a/src/google/protobuf/io/zero_copy_stream.h
+++ b/src/google/protobuf/io/zero_copy_stream.h
@@ -226,6 +226,16 @@ class LIBPROTOBUF_EXPORT ZeroCopyOutputStream {
// Returns the total number of bytes written since this object was created.
virtual int64 ByteCount() const = 0;
+ // Write a given chunk of data to the output. Some output streams may
+ // implement this in a way that avoids copying. Check AllowsAliasing() before
+ // calling WriteAliasedRaw(). It will GOOGLE_CHECK fail if WriteAliasedRaw() is
+ // called on a stream that does not allow aliasing.
+ //
+ // NOTE: It is caller's responsibility to ensure that the chunk of memory
+ // remains live until all of the data has been consumed from the stream.
+ virtual bool WriteAliasedRaw(const void* data, int size);
+ virtual bool AllowsAliasing() const { return false; }
+
private:
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyOutputStream);
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.cc b/src/google/protobuf/io/zero_copy_stream_impl.cc
index 9fcbb622..7829a29f 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl.cc
+++ b/src/google/protobuf/io/zero_copy_stream_impl.cc
@@ -413,7 +413,9 @@ int64 ConcatenatingInputStream::ByteCount() const {
LimitingInputStream::LimitingInputStream(ZeroCopyInputStream* input,
int64 limit)
- : input_(input), limit_(limit) {}
+ : input_(input), limit_(limit) {
+ prior_bytes_read_ = input_->ByteCount();
+}
LimitingInputStream::~LimitingInputStream() {
// If we overshot the limit, back up.
@@ -457,9 +459,9 @@ bool LimitingInputStream::Skip(int count) {
int64 LimitingInputStream::ByteCount() const {
if (limit_ < 0) {
- return input_->ByteCount() + limit_;
+ return input_->ByteCount() + limit_ - prior_bytes_read_;
} else {
- return input_->ByteCount();
+ return input_->ByteCount() - prior_bytes_read_;
}
}
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h
index 9fedb005..83827097 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl.h
+++ b/src/google/protobuf/io/zero_copy_stream_impl.h
@@ -344,6 +344,7 @@ class LIBPROTOBUF_EXPORT LimitingInputStream : public ZeroCopyInputStream {
private:
ZeroCopyInputStream* input_;
int64 limit_; // Decreases as we go, becomes negative if we overshoot.
+ int64 prior_bytes_read_; // Bytes read on underlying stream at construction
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LimitingInputStream);
};
diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
index b3a71ce3..d186a98b 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
+++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
@@ -33,10 +33,12 @@
// Sanjay Ghemawat, Jeff Dean, and others.
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
-#include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/stl_util.h>
#include <algorithm>
+#include <limits>
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stl_util.h>
namespace google {
namespace protobuf {
@@ -161,15 +163,23 @@ bool StringOutputStream::Next(void** data, int* size) {
// without a memory allocation this way.
STLStringResizeUninitialized(target_, target_->capacity());
} else {
- // Size has reached capacity, so double the size. Also make sure
- // that the new size is at least kMinimumSize.
+ // Size has reached capacity, try to double the size.
+ if (old_size > std::numeric_limits<int>::max() / 2) {
+ // Can not double the size otherwise it is going to cause integer
+ // overflow in the expression below: old_size * 2 ";
+ GOOGLE_LOG(ERROR) << "Cannot allocate buffer larger than kint32max for "
+ << "StringOutputStream.";
+ return false;
+ }
+ // Double the size, also make sure that the new size is at least
+ // kMinimumSize.
STLStringResizeUninitialized(
target_,
max(old_size * 2,
kMinimumSize + 0)); // "+ 0" works around GCC4 weirdness.
}
- *data = string_as_array(target_) + old_size;
+ *data = mutable_string_data(target_) + old_size;
*size = target_->size() - old_size;
return true;
}
diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.h b/src/google/protobuf/io/zero_copy_stream_impl_lite.h
index 153f543e..b980143e 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl_lite.h
+++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.h
@@ -48,6 +48,7 @@
#include <iosfwd>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stl_util.h>
namespace google {
@@ -333,6 +334,19 @@ class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStrea
// ===================================================================
+// Return a pointer to mutable characters underlying the given string. The
+// return value is valid until the next time the string is resized. We
+// trust the caller to treat the return value as an array of length s->size().
+inline char* mutable_string_data(string* s) {
+#ifdef LANG_CXX11
+ // This should be simpler & faster than string_as_array() because the latter
+ // is guaranteed to return NULL when *s is empty, so it has to check for that.
+ return &(*s)[0];
+#else
+ return string_as_array(s);
+#endif
+}
+
} // namespace io
} // namespace protobuf
diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc
index 6f155df7..75eb2a43 100644
--- a/src/google/protobuf/io/zero_copy_stream_unittest.cc
+++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc
@@ -560,9 +560,10 @@ TEST_F(IoTest, CompressionOptions) {
// Some ad-hoc testing of compression options.
string golden;
- File::ReadFileToStringOrDie(
- TestSourceDir() + "/google/protobuf/testdata/golden_message",
- &golden);
+ GOOGLE_CHECK_OK(File::GetContents(
+ TestSourceDir() +
+ "/google/protobuf/testdata/golden_message",
+ &golden, true));
GzipOutputStream::Options options;
string gzip_compressed = Compress(golden, options);
@@ -923,6 +924,26 @@ TEST_F(IoTest, LimitingInputStream) {
ReadStuff(&input);
}
+// Checks that ByteCount works correctly for LimitingInputStreams where the
+// underlying stream has already been read.
+TEST_F(IoTest, LimitingInputStreamByteCount) {
+ const int kHalfBufferSize = 128;
+ const int kBufferSize = kHalfBufferSize * 2;
+ uint8 buffer[kBufferSize];
+
+ // Set up input. Only allow half to be read at once.
+ ArrayInputStream array_input(buffer, kBufferSize, kHalfBufferSize);
+ const void* data;
+ int size;
+ EXPECT_TRUE(array_input.Next(&data, &size));
+ EXPECT_EQ(kHalfBufferSize, array_input.ByteCount());
+ // kHalfBufferSize - 1 to test limiting logic as well.
+ LimitingInputStream input(&array_input, kHalfBufferSize - 1);
+ EXPECT_EQ(0, input.ByteCount());
+ EXPECT_TRUE(input.Next(&data, &size));
+ EXPECT_EQ(kHalfBufferSize - 1 , input.ByteCount());
+}
+
// Check that a zero-size array doesn't confuse the code.
TEST(ZeroSizeArray, Input) {
ArrayInputStream input(NULL, 0);