Initial checkin.

author: temporal <temporal@630680e5-0e50-0410-840e-4b1c322b438d> 2008-07-10 02:12:20 +0000
committer: temporal <temporal@630680e5-0e50-0410-840e-4b1c322b438d> 2008-07-10 02:12:20 +0000
commit: 40ee551715c3a784ea6132dbf604b0e665ca2def (patch)
tree: 6e3ea9674be5b0f59106f88f3afa1313854beebf /src/google/protobuf/io/coded_stream.h
1 files changed, 592 insertions, 0 deletions
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h
new file mode 100644
index 00000000..91e5c56a
--- /dev/null
+++ b/src/google/protobuf/io/coded_stream.h
@@ -0,0 +1,592 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+//  Based on original Protocol Buffers design by
+//  Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains the CodedInputStream and CodedOutputStream classes,
+// which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
+// and allow you to read or write individual pieces of data in various
+// formats.  In particular, these implement the varint encoding for
+// integers, a simple variable-length encoding in which smaller numbers
+// take fewer bytes.
+//
+// Typically these classes will only be used internally by the protocol
+// buffer library in order to encode and decode protocol buffers.  Clients
+// of the library only need to know about this class if they wish to write
+// custom message parsing or serialization procedures.
+//
+// CodedOutputStream example:
+//   // Write some data to "myfile".  First we write a 4-byte "magic number"
+//   // to identify the file type, then write a length-delimited string.  The
+//   // string is composed of a varint giving the length followed by the raw
+//   // bytes.
+//   int fd = open("myfile", O_WRONLY);
+//   ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
+//   CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
+//
+//   int magic_number = 1234;
+//   char text[] = "Hello world!";
+//   coded_output->WriteLittleEndian32(magic_number);
+//   coded_output->WriteVarint32(strlen(text));
+//   coded_output->WriteRaw(text, strlen(text));
+//
+//   delete coded_output;
+//   delete raw_output;
+//   close(fd);
+//
+// CodedInputStream example:
+//   // Read a file created by the above code.
+//   int fd = open("myfile", O_RDONLY);
+//   ZeroCopyInputStream* raw_input = new FileInputStream(fd);
+//   CodedInputStream coded_input = new CodedInputStream(raw_input);
+//
+//   coded_input->ReadLittleEndian32(&magic_number);
+//   if (magic_number != 1234) {
+//     cerr << "File not in expected format." << endl;
+//     return;
+//   }
+//
+//   uint32 size;
+//   coded_input->ReadVarint32(&size);
+//
+//   char* text = new char[size + 1];
+//   coded_input->ReadRaw(buffer, size);
+//   text[size] = '\0';
+//
+//   delete coded_input;
+//   delete raw_input;
+//   close(fd);
+//
+//   cout << "Text is: " << text << endl;
+//   delete [] text;
+//
+// For those who are interested, varint encoding is defined as follows:
+//
+// The encoding operates on unsigned integers of up to 64 bits in length.
+// Each byte of the encoded value has the format:
+// * bits 0-6: Seven bits of the number being encoded.
+// * bit 7: Zero if this is the last byte in the encoding (in which
+//   case all remaining bits of the number are zero) or 1 if
+//   more bytes follow.
+// The first byte contains the least-significant 7 bits of the number, the
+// second byte (if present) contains the next-least-significant 7 bits,
+// and so on.  So, the binary number 1011000101011 would be encoded in two
+// bytes as "10101011 00101100".
+//
+// In theory, varint could be used to encode integers of any length.
+// However, for practicality we set a limit at 64 bits.  The maximum encoded
+// length of a number is thus 10 bytes.
+
+#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
+#define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
+
+#include <string>
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+
+namespace protobuf {
+namespace io {
+
+// Defined in this file.
+class CodedInputStream;
+class CodedOutputStream;
+
+// Defined in other files.
+class ZeroCopyInputStream;           // zero_copy_stream.h
+class ZeroCopyOutputStream;          // zero_copy_stream.h
+
+// Class which reads and decodes binary data which is composed of varint-
+// encoded integers and fixed-width pieces.  Wraps a ZeroCopyInputStream.
+// Most users will not need to deal with CodedInputStream.
+//
+// Most methods of CodedInputStream that return a bool return false if an
+// underlying I/O error occurs or if the data is malformed.  Once such a
+// failure occurs, the CodedInputStream is broken and is no longer useful.
+class LIBPROTOBUF_EXPORT CodedInputStream {
+ public:
+  // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
+  explicit CodedInputStream(ZeroCopyInputStream* input);
+
+  // Destroy the CodedInputStream and position the underlying
+  // ZeroCopyInputStream at the first unread byte.  If an error occurred while
+  // reading (causing a method to return false), then the exact position of
+  // the input stream may be anywhere between the last value that was read
+  // successfully and the stream's byte limit.
+  ~CodedInputStream();
+
+
+  // Skips a number of bytes.  Returns false if an underlying read error
+  // occurs.
+  bool Skip(int count);
+
+  // Read raw bytes, copying them into the given buffer.
+  bool ReadRaw(void* buffer, int size);
+
+  // Like ReadRaw, but reads into a string.
+  //
+  // Implementation Note:  ReadString() grows the string gradually as it
+  // reads in the data, rather than allocating the entire requested size
+  // upfront.  This prevents denial-of-service attacks in which a client
+  // could claim that a string is going to be MAX_INT bytes long in order to
+  // crash the server because it can't allocate this much space at once.
+  bool ReadString(string* buffer, int size);
+
+
+  // Read a 32-bit little-endian integer.
+  bool ReadLittleEndian32(uint32* value);
+  // Read a 64-bit little-endian integer.
+  bool ReadLittleEndian64(uint64* value);
+
+  // Read an unsigned integer with Varint encoding, truncating to 32 bits.
+  // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
+  // it to uint32, but may be more efficient.
+  bool ReadVarint32(uint32* value);
+  // Read an unsigned integer with Varint encoding.
+  bool ReadVarint64(uint64* value);
+
+  // Read a tag.  This calls ReadVarint32() and returns the result, or returns
+  // zero (which is not a valid tag) if ReadVarint32() fails.  Also, it updates
+  // the last tag value, which can be checked with LastTagWas().
+  // Always inline because this is only called in once place per parse loop
+  // but it is called for every iteration of said loop, so it should be fast.
+  // GCC doesn't want to inline this by default.
+  uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
+  // Usually returns true if calling ReadVarint32() now would produce the given
+  // value.  Will always return false if ReadVarint32() would not return the
+  // given value.  If ExpectTag() returns true, it also advances past
+  // the varint.  For best performance, use a compile-time constant as the
+  // parameter.
+  // Always inline because this collapses to a small number of instructions
+  // when given a constant parameter, but GCC doesn't want to inline by default.
+  bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
+  // Usually returns true if no more bytes can be read.  Always returns false
+  // if more bytes can be read.  If ExpectAtEnd() returns true, a subsequent
+  // call to LastTagWas() will act as if ReadTag() had been called and returned
+  // zero, and ConsumedEntireMessage() will return true.
+  bool ExpectAtEnd();
+
+  // If the last call to ReadTag() returned the given value, returns true.
+  // Otherwise, returns false;
+  //
+  // This is needed because parsers for some types of embedded messages
+  // (with field type TYPE_GROUP) don't actually know that they've reached the
+  // end of a message until they see an ENDGROUP tag, which was actually part
+  // of the enclosing message.  The enclosing message would like to check that
+  // tag to make sure it had the right number, so it calls LastTagWas() on
+  // return from the embedded parser to check.
+  bool LastTagWas(uint32 expected);
+
+  // When parsing message (but NOT a group), this method must be called
+  // immediately after MergeFromCodedStream() returns (if it returns true)
+  // to further verify that the message ended in a legitimate way.  For
+  // example, this verifies that parsing did not end on an end-group tag.
+  // It also checks for some cases where, due to optimizations,
+  // MergeFromCodedStream() can incorrectly return true.
+  bool ConsumedEntireMessage();
+
+  // Limits ----------------------------------------------------------
+  // Limits are used when parsing length-delimited embedded messages.
+  // After the message's length is read, PushLimit() is used to prevent
+  // the CodedInputStream from reading beyond that length.  Once the
+  // embedded message has been parsed, PopLimit() is called to undo the
+  // limit.
+
+  // Opaque type used with PushLimit() and PopLimit().  Do not modify
+  // values of this type yourself.  The only reason that this isn't a
+  // struct with private internals is for efficiency.
+  typedef int Limit;
+
+  // Places a limit on the number of bytes that the stream may read,
+  // starting from the current position.  Once the stream hits this limit,
+  // it will act like the end of the input has been reached until PopLimit()
+  // is called.
+  //
+  // As the names imply, the stream conceptually has a stack of limits.  The
+  // shortest limit on the stack is always enforced, even if it is not the
+  // top limit.
+  //
+  // The value returned by PushLimit() is opaque to the caller, and must
+  // be passed unchanged to the corresponding call to PopLimit().
+  Limit PushLimit(int byte_limit);
+
+  // Pops the last limit pushed by PushLimit().  The input must be the value
+  // returned by that call to PushLimit().
+  void PopLimit(Limit limit);
+
+  // Returns the number of bytes left until the nearest limit on the
+  // stack is hit, or -1 if no limits are in place.
+  int BytesUntilLimit();
+
+  // Total Bytes Limit -----------------------------------------------
+  // To prevent malicious users from sending excessively large messages
+  // and causing integer overflows or memory exhaustion, CodedInputStream
+  // imposes a hard limit on the total number of bytes it will read.
+
+  // Sets the maximum number of bytes that this CodedInputStream will read
+  // before refusing to continue.  To prevent integer overflows in the
+  // protocol buffers implementation, as well as to prevent servers from
+  // allocating enormous amounts of memory to hold parsed messages, the
+  // maximum message length should be limited to the shortest length that
+  // will not harm usability.  The theoretical shortest message that could
+  // cause integer overflows is 512MB.  The default limit is 64MB.  Apps
+  // should set shorter limits if possible.  If warning_threshold is not -1,
+  // a warning will be printed to stderr after warning_threshold bytes are
+  // read.  An error will always be printed to stderr if the limit is
+  // reached.
+  //
+  // This is unrelated to PushLimit()/PopLimit().
+  //
+  // Hint:  If you are reading this because your program is printing a
+  //   warning about dangerously large protocol messages, you may be
+  //   confused about what to do next.  The best option is to change your
+  //   design such that excessively large messages are not necessary.
+  //   For example, try to design file formats to consist of many small
+  //   messages rather than a single large one.  If this is infeasible,
+  //   you will need to increase the limit.  Chances are, though, that
+  //   your code never constructs a CodedInputStream on which the limit
+  //   can be set.  You probably parse messages by calling things like
+  //   Message::ParseFromString().  In this case, you will need to change
+  //   your code to instead construct some sort of ZeroCopyInputStream
+  //   (e.g. an ArrayInputStream), construct a CodedInputStream around
+  //   that, then call Message::ParseFromCodedStream() instead.  Then
+  //   you can adjust the limit.  Yes, it's more work, but you're doing
+  //   something unusual.
+  void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
+
+  // Recursion Limit -------------------------------------------------
+  // To prevent corrupt or malicious messages from causing stack overflows,
+  // we must keep track of the depth of recursion when parsing embedded
+  // messages and groups.  CodedInputStream keeps track of this because it
+  // is the only object that is passed down the stack during parsing.
+
+  // Sets the maximum recursion depth.  The default is 64.
+  void SetRecursionLimit(int limit);
+
+  // Increments the current recursion depth.  Returns true if the depth is
+  // under the limit, false if it has gone over.
+  bool IncrementRecursionDepth();
+
+  // Decrements the recursion depth.
+  void DecrementRecursionDepth();
+
+ private:
+  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
+
+  ZeroCopyInputStream* input_;
+  const uint8* buffer_;
+  int buffer_size_;       // size of current buffer
+  int total_bytes_read_;  // total bytes read from input_, including
+                          // the current buffer
+
+  // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
+  // so that we can BackUp() on destruction.
+  int overflow_bytes_;
+
+  // LastTagWas() stuff.
+  uint32 last_tag_;         // result of last ReadTag().
+
+  // This is set true by ReadVarint32Fallback() if it is called when exactly
+  // at EOF, or by ExpectAtEnd() when it returns true.  This happens when we
+  // reach the end of a message and attempt to read another tag.
+  bool legitimate_message_end_;
+
+  // See EnableAliasing().
+  bool aliasing_enabled_;
+
+  // Limits
+  Limit current_limit_;   // if position = -1, no limit is applied
+
+  // For simplicity, if the current buffer crosses a limit (either a normal
+  // limit created by PushLimit() or the total bytes limit), buffer_size_
+  // only tracks the number of bytes before that limit.  This field
+  // contains the number of bytes after it.  Note that this implies that if
+  // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
+  // hit a limit.  However, if both are zero, it doesn't necessarily mean
+  // we aren't at a limit -- the buffer may have ended exactly at the limit.
+  int buffer_size_after_limit_;
+
+  // Maximum number of bytes to read, period.  This is unrelated to
+  // current_limit_.  Set using SetTotalBytesLimit().
+  int total_bytes_limit_;
+  int total_bytes_warning_threshold_;
+
+  // Current recursion depth, controlled by IncrementRecursionDepth() and
+  // DecrementRecursionDepth().
+  int recursion_depth_;
+  // Recursion depth limit, set by SetRecursionLimit().
+  int recursion_limit_;
+
+  // Advance the buffer by a given number of bytes.
+  void Advance(int amount);
+
+  // Recomputes the value of buffer_size_after_limit_.  Must be called after
+  // current_limit_ or total_bytes_limit_ changes.
+  void RecomputeBufferLimits();
+
+  // Writes an error message saying that we hit total_bytes_limit_.
+  void PrintTotalBytesLimitError();
+
+  // Called when the buffer runs out to request more data.  Implies an
+  // Advance(buffer_size_).
+  bool Refresh();
+
+  bool ReadVarint32Fallback(uint32* value);
+};
+
+// Class which encodes and writes binary data which is composed of varint-
+// encoded integers and fixed-width pieces.  Wraps a ZeroCopyOutputStream.
+// Most users will not need to deal with CodedOutputStream.
+//
+// Most methods of CodedOutputStream which return a bool return false if an
+// underlying I/O error occurs.  Once such a failure occurs, the
+// CodedOutputStream is broken and is no longer useful.
+class LIBPROTOBUF_EXPORT CodedOutputStream {
+ public:
+  // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
+  explicit CodedOutputStream(ZeroCopyOutputStream* output);
+
+  // Destroy the CodedOutputStream and position the underlying
+  // ZeroCopyOutputStream immediately after the last byte written.
+  ~CodedOutputStream();
+
+  // Write raw bytes, copying them from the given buffer.
+  bool WriteRaw(const void* buffer, int size);
+
+  // Equivalent to WriteRaw(str.data(), str.size()).
+  bool WriteString(const string& str);
+
+
+  // Write a 32-bit little-endian integer.
+  bool WriteLittleEndian32(uint32 value);
+  // Write a 64-bit little-endian integer.
+  bool WriteLittleEndian64(uint64 value);
+
+  // Write an unsigned integer with Varint encoding.  Writing a 32-bit value
+  // is equivalent to casting it to uint64 and writing it as a 64-bit value,
+  // but may be more efficient.
+  bool WriteVarint32(uint32 value);
+  // Write an unsigned integer with Varint encoding.
+  bool WriteVarint64(uint64 value);
+
+  // Equivalent to WriteVarint32() except when the value is negative,
+  // in which case it must be sign-extended to a full 10 bytes.
+  bool WriteVarint32SignExtended(int32 value);
+
+  // This is identical to WriteVarint32(), but optimized for writing tags.
+  // In particular, if the input is a compile-time constant, this method
+  // compiles down to a couple instructions.
+  // Always inline because otherwise the aformentioned optimization can't work,
+  // but GCC by default doesn't want to inline this.
+  bool WriteTag(uint32 value) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
+  // Returns the number of bytes needed to encode the given value as a varint.
+  static int VarintSize32(uint32 value);
+  // Returns the number of bytes needed to encode the given value as a varint.
+  static int VarintSize64(uint64 value);
+
+  // If negative, 10 bytes.  Otheriwse, same as VarintSize32().
+  static int VarintSize32SignExtended(int32 value);
+
+  // Returns the total number of bytes written since this object was created.
+  inline int ByteCount() const;
+
+ private:
+  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
+
+  ZeroCopyOutputStream* output_;
+  uint8* buffer_;
+  int buffer_size_;
+  int total_bytes_;  // Sum of sizes of all buffers seen so far.
+
+  // Advance the buffer by a given number of bytes.
+  void Advance(int amount);
+
+  // Called when the buffer runs out to request more data.  Implies an
+  // Advance(buffer_size_).
+  bool Refresh();
+
+  bool WriteVarint32Fallback(uint32 value);
+  static int VarintSize32Fallback(uint32 value);
+};
+
+// inline methods ====================================================
+// The vast majority of varints are only one byte.  These inline
+// methods optimize for that case.
+
+inline bool CodedInputStream::ReadVarint32(uint32* value) {
+  if (buffer_size_ != 0 && *buffer_ < 0x80) {
+    *value = *buffer_;
+    Advance(1);
+    return true;
+  } else {
+    return ReadVarint32Fallback(value);
+  }
+}
+
+inline uint32 CodedInputStream::ReadTag() {
+  if (buffer_size_ != 0 && buffer_[0] < 0x80) {
+    last_tag_ = buffer_[0];
+    Advance(1);
+    return last_tag_;
+  } else if (buffer_size_ >= 2 && buffer_[1] < 0x80) {
+    last_tag_ = (buffer_[0] & 0x7f) + (buffer_[1] << 7);
+    Advance(2);
+    return last_tag_;
+  } else if (ReadVarint32Fallback(&last_tag_)) {
+    return last_tag_;
+  } else {
+    last_tag_ = 0;
+    return 0;
+  }
+}
+
+inline bool CodedInputStream::LastTagWas(uint32 expected) {
+  return last_tag_ == expected;
+}
+
+inline bool CodedInputStream::ConsumedEntireMessage() {
+  return legitimate_message_end_;
+}
+
+inline bool CodedInputStream::ExpectTag(uint32 expected) {
+  if (expected < (1 << 7)) {
+    if (buffer_size_ != 0 && buffer_[0] == expected) {
+      Advance(1);
+      return true;
+    } else {
+      return false;
+    }
+  } else if (expected < (1 << 14)) {
+    if (buffer_size_ >= 2 &&
+        buffer_[0] == static_cast<uint8>(expected | 0x80) &&
+        buffer_[1] == static_cast<uint8>(expected >> 7)) {
+      Advance(2);
+      return true;
+    } else {
+      return false;
+    }
+  } else {
+    // Don't bother optimizing for larger values.
+    return false;
+  }
+}
+
+inline bool CodedInputStream::ExpectAtEnd() {
+  // If we are at a limit we know no more bytes can be read.  Otherwise, it's
+  // hard to say without calling Refresh(), and we'd rather not do that.
+
+  if (buffer_size_ == 0 && buffer_size_after_limit_ != 0) {
+    last_tag_ = 0;                   // Pretend we called ReadTag()...
+    legitimate_message_end_ = true;  // ... and it hit EOF.
+    return true;
+  } else {
+    return false;
+  }
+}
+
+inline bool CodedOutputStream::WriteVarint32(uint32 value) {
+  if (value < 0x80 && buffer_size_ > 0) {
+    *buffer_ = value;
+    Advance(1);
+    return true;
+  } else {
+    return WriteVarint32Fallback(value);
+  }
+}
+
+inline bool CodedOutputStream::WriteVarint32SignExtended(int32 value) {
+  if (value < 0) {
+    return WriteVarint64(static_cast<uint64>(value));
+  } else {
+    return WriteVarint32(static_cast<uint32>(value));
+  }
+}
+
+inline bool CodedOutputStream::WriteTag(uint32 value) {
+  if (value < (1 << 7)) {
+    if (buffer_size_ != 0) {
+      buffer_[0] = value;
+      Advance(1);
+      return true;
+    }
+  } else if (value < (1 << 14)) {
+    if (buffer_size_ >= 2) {
+      buffer_[0] = static_cast<uint8>(value | 0x80);
+      buffer_[1] = static_cast<uint8>(value >> 7);
+      Advance(2);
+      return true;
+    }
+  }
+  return WriteVarint32Fallback(value);
+}
+
+inline int CodedOutputStream::VarintSize32(uint32 value) {
+  if (value < (1 << 7)) {
+    return 1;
+  } else  {
+    return VarintSize32Fallback(value);
+  }
+}
+
+inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
+  if (value < 0) {
+    return 10;     // TODO(kenton):  Make this a symbolic constant.
+  } else {
+    return VarintSize32(static_cast<uint32>(value));
+  }
+}
+
+inline bool CodedOutputStream::WriteString(const string& str) {
+  return WriteRaw(str.data(), str.size());
+}
+
+inline int CodedOutputStream::ByteCount() const {
+  return total_bytes_ - buffer_size_;
+}
+
+inline void CodedInputStream::Advance(int amount) {
+  buffer_ += amount;
+  buffer_size_ -= amount;
+}
+
+inline void CodedOutputStream::Advance(int amount) {
+  buffer_ += amount;
+  buffer_size_ -= amount;
+}
+
+inline void CodedInputStream::SetRecursionLimit(int limit) {
+  recursion_limit_ = limit;
+}
+
+inline bool CodedInputStream::IncrementRecursionDepth() {
+  ++recursion_depth_;
+  return recursion_depth_ <= recursion_limit_;
+}
+
+inline void CodedInputStream::DecrementRecursionDepth() {
+  if (recursion_depth_ > 0) --recursion_depth_;
+}
+
+}  // namespace io
+}  // namespace protobuf
+
+}  // namespace google
+#endif  // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
author	temporal <temporal@630680e5-0e50-0410-840e-4b1c322b438d>	2008-07-10 02:12:20 +0000
committer	temporal <temporal@630680e5-0e50-0410-840e-4b1c322b438d>	2008-07-10 02:12:20 +0000
commit	40ee551715c3a784ea6132dbf604b0e665ca2def (patch)
tree	6e3ea9674be5b0f59106f88f3afa1313854beebf /src/google/protobuf/io/coded_stream.h