From 4de8f55113007fdc8e34107950e605fc0209d465 Mon Sep 17 00:00:00 2001 From: "jieluo@google.com" Date: Fri, 18 Jul 2014 00:47:59 +0000 Subject: down integrate to svn --- src/google/protobuf/io/coded_stream.h | 94 +++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 5 deletions(-) (limited to 'src/google/protobuf/io/coded_stream.h') diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h index 66cbee00..50a03a11 100644 --- a/src/google/protobuf/io/coded_stream.h +++ b/src/google/protobuf/io/coded_stream.h @@ -233,11 +233,22 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // Read a tag. This calls ReadVarint32() and returns the result, or returns // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates // the last tag value, which can be checked with LastTagWas(). - // Always inline because this is only called in once place per parse loop + // Always inline because this is only called in one place per parse loop // but it is called for every iteration of said loop, so it should be fast. // GCC doesn't want to inline this by default. uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE; + // This usually a faster alternative to ReadTag() when cutoff is a manifest + // constant. It does particularly well for cutoff >= 127. The first part + // of the return value is the tag that was read, though it can also be 0 in + // the cases where ReadTag() would return 0. If the second part is true + // then the tag is known to be in [0, cutoff]. If not, the tag either is + // above cutoff or is 0. (There's intentional wiggle room when tag is 0, + // because that can arise in several ways, and for best performance we want + // to avoid an extra "is tag == 0?" check here.) + inline std::pair ReadTagWithCutoff(uint32 cutoff) + GOOGLE_ATTRIBUTE_ALWAYS_INLINE; + // Usually returns true if calling ReadVarint32() now would produce the given // value. Will always return false if ReadVarint32() would not return the // given value. If ExpectTag() returns true, it also advances past @@ -264,8 +275,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // zero, and ConsumedEntireMessage() will return true. bool ExpectAtEnd(); - // If the last call to ReadTag() returned the given value, returns true. - // Otherwise, returns false; + // If the last call to ReadTag() or ReadTagWithCutoff() returned the + // given value, returns true. Otherwise, returns false; // // This is needed because parsers for some types of embedded messages // (with field type TYPE_GROUP) don't actually know that they've reached the @@ -333,7 +344,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // cause integer overflows is 512MB. The default limit is 64MB. Apps // should set shorter limits if possible. If warning_threshold is not -1, // a warning will be printed to stderr after warning_threshold bytes are - // read. For backwards compatibility all negative values get squached to -1, + // read. For backwards compatibility all negative values get squashed to -1, // as other negative values might have special internal meanings. // An error will always be printed to stderr if the limit is reached. // @@ -356,6 +367,10 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // something unusual. void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold); + // The Total Bytes Limit minus the Current Position, or -1 if there + // is no Total Bytes Limit. + int BytesUntilTotalBytesLimit() const; + // Recursion Limit ------------------------------------------------- // To prevent corrupt or malicious messages from causing stack overflows, // we must keep track of the depth of recursion when parsing embedded @@ -466,7 +481,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { int overflow_bytes_; // LastTagWas() stuff. - uint32 last_tag_; // result of last ReadTag(). + uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff(). // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly // at EOF, or by ExpectAtEnd() when it returns true. This happens when we @@ -638,6 +653,9 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // Write raw bytes, copying them from the given buffer. void WriteRaw(const void* buffer, int size); + // Like WriteRaw() but will try to write aliased data if aliasing is + // turned on. + void WriteRawMaybeAliased(const void* data, int size); // Like WriteRaw() but writing directly to the target array. // This is _not_ inlined, as the compiler often optimizes memcpy into inline // copy loops. Since this gets called by every field with string or bytes @@ -649,8 +667,21 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { void WriteString(const string& str); // Like WriteString() but writing directly to the target array. static uint8* WriteStringToArray(const string& str, uint8* target); + // Write the varint-encoded size of str followed by str. + static uint8* WriteStringWithSizeToArray(const string& str, uint8* target); + // Instructs the CodedOutputStream to allow the underlying + // ZeroCopyOutputStream to hold pointers to the original structure instead of + // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the + // underlying stream does not support aliasing, then enabling it has no + // affect. For now, this only affects the behavior of + // WriteRawMaybeAliased(). + // + // NOTE: It is caller's responsibility to ensure that the chunk of memory + // remains live until all of the data has been consumed from the stream. + void EnableAliasing(bool enabled); + // Write a 32-bit little-endian integer. void WriteLittleEndian32(uint32 value); // Like WriteLittleEndian32() but writing directly to the target array. @@ -725,6 +756,7 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { int buffer_size_; int total_bytes_; // Sum of sizes of all buffers seen so far. bool had_error_; // Whether an error occurred during output. + bool aliasing_enabled_; // See EnableAliasing(). // Advance the buffer by a given number of bytes. void Advance(int amount); @@ -733,6 +765,10 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // Advance(buffer_size_). bool Refresh(); + // Like WriteRaw() but may avoid copying if the underlying + // ZeroCopyOutputStream supports it. + void WriteAliasedRaw(const void* buffer, int size); + static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target); // Always-inlined versions of WriteVarint* functions so that code can be @@ -850,6 +886,45 @@ inline uint32 CodedInputStream::ReadTag() { } } +inline std::pair CodedInputStream::ReadTagWithCutoff( + uint32 cutoff) { + // In performance-sensitive code we can expect cutoff to be a compile-time + // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at + // compile time. + if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) { + // Hot case: buffer_ non_empty, buffer_[0] in [1, 128). + // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields + // is large enough then is it better to check for the two-byte case first? + if (static_cast(buffer_[0]) > 0) { + const uint32 kMax1ByteVarint = 0x7f; + uint32 tag = last_tag_ = buffer_[0]; + Advance(1); + return make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff); + } + // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available, + // and tag is two bytes. The latter is tested by bitwise-and-not of the + // first byte and the second byte. + if (cutoff >= 0x80 && + GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) && + GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) { + const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f; + uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80); + Advance(2); + // It might make sense to test for tag == 0 now, but it is so rare that + // that we don't bother. A varint-encoded 0 should be one byte unless + // the encoder lost its mind. The second part of the return value of + // this function is allowed to be either true or false if the tag is 0, + // so we don't have to check for tag == 0. We may need to check whether + // it exceeds cutoff. + bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff; + return make_pair(tag, at_or_below_cutoff); + } + } + // Slow path + last_tag_ = ReadTagFallback(); + return make_pair(last_tag_, static_cast(last_tag_ - 1) < cutoff); +} + inline bool CodedInputStream::LastTagWas(uint32 expected) { return last_tag_ == expected; } @@ -1029,6 +1104,15 @@ inline void CodedOutputStream::WriteString(const string& str) { WriteRaw(str.data(), static_cast(str.size())); } +inline void CodedOutputStream::WriteRawMaybeAliased( + const void* data, int size) { + if (aliasing_enabled_) { + WriteAliasedRaw(data, size); + } else { + WriteRaw(data, size); + } +} + inline uint8* CodedOutputStream::WriteStringToArray( const string& str, uint8* target) { return WriteRawToArray(str.data(), static_cast(str.size()), target); -- cgit v1.2.3