aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/google/protobuf/io/coded_stream.h
diff options
context:
space:
mode:
authorGravatar temporal <temporal@630680e5-0e50-0410-840e-4b1c322b438d>2008-07-10 02:12:20 +0000
committerGravatar temporal <temporal@630680e5-0e50-0410-840e-4b1c322b438d>2008-07-10 02:12:20 +0000
commit40ee551715c3a784ea6132dbf604b0e665ca2def (patch)
tree6e3ea9674be5b0f59106f88f3afa1313854beebf /src/google/protobuf/io/coded_stream.h
Initial checkin.
Diffstat (limited to 'src/google/protobuf/io/coded_stream.h')
-rw-r--r--src/google/protobuf/io/coded_stream.h592
1 files changed, 592 insertions, 0 deletions
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h
new file mode 100644
index 00000000..91e5c56a
--- /dev/null
+++ b/src/google/protobuf/io/coded_stream.h
@@ -0,0 +1,592 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains the CodedInputStream and CodedOutputStream classes,
+// which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
+// and allow you to read or write individual pieces of data in various
+// formats. In particular, these implement the varint encoding for
+// integers, a simple variable-length encoding in which smaller numbers
+// take fewer bytes.
+//
+// Typically these classes will only be used internally by the protocol
+// buffer library in order to encode and decode protocol buffers. Clients
+// of the library only need to know about this class if they wish to write
+// custom message parsing or serialization procedures.
+//
+// CodedOutputStream example:
+// // Write some data to "myfile". First we write a 4-byte "magic number"
+// // to identify the file type, then write a length-delimited string. The
+// // string is composed of a varint giving the length followed by the raw
+// // bytes.
+// int fd = open("myfile", O_WRONLY);
+// ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
+// CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
+//
+// int magic_number = 1234;
+// char text[] = "Hello world!";
+// coded_output->WriteLittleEndian32(magic_number);
+// coded_output->WriteVarint32(strlen(text));
+// coded_output->WriteRaw(text, strlen(text));
+//
+// delete coded_output;
+// delete raw_output;
+// close(fd);
+//
+// CodedInputStream example:
+// // Read a file created by the above code.
+// int fd = open("myfile", O_RDONLY);
+// ZeroCopyInputStream* raw_input = new FileInputStream(fd);
+// CodedInputStream coded_input = new CodedInputStream(raw_input);
+//
+// coded_input->ReadLittleEndian32(&magic_number);
+// if (magic_number != 1234) {
+// cerr << "File not in expected format." << endl;
+// return;
+// }
+//
+// uint32 size;
+// coded_input->ReadVarint32(&size);
+//
+// char* text = new char[size + 1];
+// coded_input->ReadRaw(buffer, size);
+// text[size] = '\0';
+//
+// delete coded_input;
+// delete raw_input;
+// close(fd);
+//
+// cout << "Text is: " << text << endl;
+// delete [] text;
+//
+// For those who are interested, varint encoding is defined as follows:
+//
+// The encoding operates on unsigned integers of up to 64 bits in length.
+// Each byte of the encoded value has the format:
+// * bits 0-6: Seven bits of the number being encoded.
+// * bit 7: Zero if this is the last byte in the encoding (in which
+// case all remaining bits of the number are zero) or 1 if
+// more bytes follow.
+// The first byte contains the least-significant 7 bits of the number, the
+// second byte (if present) contains the next-least-significant 7 bits,
+// and so on. So, the binary number 1011000101011 would be encoded in two
+// bytes as "10101011 00101100".
+//
+// In theory, varint could be used to encode integers of any length.
+// However, for practicality we set a limit at 64 bits. The maximum encoded
+// length of a number is thus 10 bytes.
+
+#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
+#define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
+
+#include <string>
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+
+namespace protobuf {
+namespace io {
+
+// Defined in this file.
+class CodedInputStream;
+class CodedOutputStream;
+
+// Defined in other files.
+class ZeroCopyInputStream; // zero_copy_stream.h
+class ZeroCopyOutputStream; // zero_copy_stream.h
+
+// Class which reads and decodes binary data which is composed of varint-
+// encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream.
+// Most users will not need to deal with CodedInputStream.
+//
+// Most methods of CodedInputStream that return a bool return false if an
+// underlying I/O error occurs or if the data is malformed. Once such a
+// failure occurs, the CodedInputStream is broken and is no longer useful.
+class LIBPROTOBUF_EXPORT CodedInputStream {
+ public:
+ // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
+ explicit CodedInputStream(ZeroCopyInputStream* input);
+
+ // Destroy the CodedInputStream and position the underlying
+ // ZeroCopyInputStream at the first unread byte. If an error occurred while
+ // reading (causing a method to return false), then the exact position of
+ // the input stream may be anywhere between the last value that was read
+ // successfully and the stream's byte limit.
+ ~CodedInputStream();
+
+
+ // Skips a number of bytes. Returns false if an underlying read error
+ // occurs.
+ bool Skip(int count);
+
+ // Read raw bytes, copying them into the given buffer.
+ bool ReadRaw(void* buffer, int size);
+
+ // Like ReadRaw, but reads into a string.
+ //
+ // Implementation Note: ReadString() grows the string gradually as it
+ // reads in the data, rather than allocating the entire requested size
+ // upfront. This prevents denial-of-service attacks in which a client
+ // could claim that a string is going to be MAX_INT bytes long in order to
+ // crash the server because it can't allocate this much space at once.
+ bool ReadString(string* buffer, int size);
+
+
+ // Read a 32-bit little-endian integer.
+ bool ReadLittleEndian32(uint32* value);
+ // Read a 64-bit little-endian integer.
+ bool ReadLittleEndian64(uint64* value);
+
+ // Read an unsigned integer with Varint encoding, truncating to 32 bits.
+ // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
+ // it to uint32, but may be more efficient.
+ bool ReadVarint32(uint32* value);
+ // Read an unsigned integer with Varint encoding.
+ bool ReadVarint64(uint64* value);
+
+ // Read a tag. This calls ReadVarint32() and returns the result, or returns
+ // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates
+ // the last tag value, which can be checked with LastTagWas().
+ // Always inline because this is only called in once place per parse loop
+ // but it is called for every iteration of said loop, so it should be fast.
+ // GCC doesn't want to inline this by default.
+ uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
+ // Usually returns true if calling ReadVarint32() now would produce the given
+ // value. Will always return false if ReadVarint32() would not return the
+ // given value. If ExpectTag() returns true, it also advances past
+ // the varint. For best performance, use a compile-time constant as the
+ // parameter.
+ // Always inline because this collapses to a small number of instructions
+ // when given a constant parameter, but GCC doesn't want to inline by default.
+ bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
+ // Usually returns true if no more bytes can be read. Always returns false
+ // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent
+ // call to LastTagWas() will act as if ReadTag() had been called and returned
+ // zero, and ConsumedEntireMessage() will return true.
+ bool ExpectAtEnd();
+
+ // If the last call to ReadTag() returned the given value, returns true.
+ // Otherwise, returns false;
+ //
+ // This is needed because parsers for some types of embedded messages
+ // (with field type TYPE_GROUP) don't actually know that they've reached the
+ // end of a message until they see an ENDGROUP tag, which was actually part
+ // of the enclosing message. The enclosing message would like to check that
+ // tag to make sure it had the right number, so it calls LastTagWas() on
+ // return from the embedded parser to check.
+ bool LastTagWas(uint32 expected);
+
+ // When parsing message (but NOT a group), this method must be called
+ // immediately after MergeFromCodedStream() returns (if it returns true)
+ // to further verify that the message ended in a legitimate way. For
+ // example, this verifies that parsing did not end on an end-group tag.
+ // It also checks for some cases where, due to optimizations,
+ // MergeFromCodedStream() can incorrectly return true.
+ bool ConsumedEntireMessage();
+
+ // Limits ----------------------------------------------------------
+ // Limits are used when parsing length-delimited embedded messages.
+ // After the message's length is read, PushLimit() is used to prevent
+ // the CodedInputStream from reading beyond that length. Once the
+ // embedded message has been parsed, PopLimit() is called to undo the
+ // limit.
+
+ // Opaque type used with PushLimit() and PopLimit(). Do not modify
+ // values of this type yourself. The only reason that this isn't a
+ // struct with private internals is for efficiency.
+ typedef int Limit;
+
+ // Places a limit on the number of bytes that the stream may read,
+ // starting from the current position. Once the stream hits this limit,
+ // it will act like the end of the input has been reached until PopLimit()
+ // is called.
+ //
+ // As the names imply, the stream conceptually has a stack of limits. The
+ // shortest limit on the stack is always enforced, even if it is not the
+ // top limit.
+ //
+ // The value returned by PushLimit() is opaque to the caller, and must
+ // be passed unchanged to the corresponding call to PopLimit().
+ Limit PushLimit(int byte_limit);
+
+ // Pops the last limit pushed by PushLimit(). The input must be the value
+ // returned by that call to PushLimit().
+ void PopLimit(Limit limit);
+
+ // Returns the number of bytes left until the nearest limit on the
+ // stack is hit, or -1 if no limits are in place.
+ int BytesUntilLimit();
+
+ // Total Bytes Limit -----------------------------------------------
+ // To prevent malicious users from sending excessively large messages
+ // and causing integer overflows or memory exhaustion, CodedInputStream
+ // imposes a hard limit on the total number of bytes it will read.
+
+ // Sets the maximum number of bytes that this CodedInputStream will read
+ // before refusing to continue. To prevent integer overflows in the
+ // protocol buffers implementation, as well as to prevent servers from
+ // allocating enormous amounts of memory to hold parsed messages, the
+ // maximum message length should be limited to the shortest length that
+ // will not harm usability. The theoretical shortest message that could
+ // cause integer overflows is 512MB. The default limit is 64MB. Apps
+ // should set shorter limits if possible. If warning_threshold is not -1,
+ // a warning will be printed to stderr after warning_threshold bytes are
+ // read. An error will always be printed to stderr if the limit is
+ // reached.
+ //
+ // This is unrelated to PushLimit()/PopLimit().
+ //
+ // Hint: If you are reading this because your program is printing a
+ // warning about dangerously large protocol messages, you may be
+ // confused about what to do next. The best option is to change your
+ // design such that excessively large messages are not necessary.
+ // For example, try to design file formats to consist of many small
+ // messages rather than a single large one. If this is infeasible,
+ // you will need to increase the limit. Chances are, though, that
+ // your code never constructs a CodedInputStream on which the limit
+ // can be set. You probably parse messages by calling things like
+ // Message::ParseFromString(). In this case, you will need to change
+ // your code to instead construct some sort of ZeroCopyInputStream
+ // (e.g. an ArrayInputStream), construct a CodedInputStream around
+ // that, then call Message::ParseFromCodedStream() instead. Then
+ // you can adjust the limit. Yes, it's more work, but you're doing
+ // something unusual.
+ void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
+
+ // Recursion Limit -------------------------------------------------
+ // To prevent corrupt or malicious messages from causing stack overflows,
+ // we must keep track of the depth of recursion when parsing embedded
+ // messages and groups. CodedInputStream keeps track of this because it
+ // is the only object that is passed down the stack during parsing.
+
+ // Sets the maximum recursion depth. The default is 64.
+ void SetRecursionLimit(int limit);
+
+ // Increments the current recursion depth. Returns true if the depth is
+ // under the limit, false if it has gone over.
+ bool IncrementRecursionDepth();
+
+ // Decrements the recursion depth.
+ void DecrementRecursionDepth();
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
+
+ ZeroCopyInputStream* input_;
+ const uint8* buffer_;
+ int buffer_size_; // size of current buffer
+ int total_bytes_read_; // total bytes read from input_, including
+ // the current buffer
+
+ // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
+ // so that we can BackUp() on destruction.
+ int overflow_bytes_;
+
+ // LastTagWas() stuff.
+ uint32 last_tag_; // result of last ReadTag().
+
+ // This is set true by ReadVarint32Fallback() if it is called when exactly
+ // at EOF, or by ExpectAtEnd() when it returns true. This happens when we
+ // reach the end of a message and attempt to read another tag.
+ bool legitimate_message_end_;
+
+ // See EnableAliasing().
+ bool aliasing_enabled_;
+
+ // Limits
+ Limit current_limit_; // if position = -1, no limit is applied
+
+ // For simplicity, if the current buffer crosses a limit (either a normal
+ // limit created by PushLimit() or the total bytes limit), buffer_size_
+ // only tracks the number of bytes before that limit. This field
+ // contains the number of bytes after it. Note that this implies that if
+ // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
+ // hit a limit. However, if both are zero, it doesn't necessarily mean
+ // we aren't at a limit -- the buffer may have ended exactly at the limit.
+ int buffer_size_after_limit_;
+
+ // Maximum number of bytes to read, period. This is unrelated to
+ // current_limit_. Set using SetTotalBytesLimit().
+ int total_bytes_limit_;
+ int total_bytes_warning_threshold_;
+
+ // Current recursion depth, controlled by IncrementRecursionDepth() and
+ // DecrementRecursionDepth().
+ int recursion_depth_;
+ // Recursion depth limit, set by SetRecursionLimit().
+ int recursion_limit_;
+
+ // Advance the buffer by a given number of bytes.
+ void Advance(int amount);
+
+ // Recomputes the value of buffer_size_after_limit_. Must be called after
+ // current_limit_ or total_bytes_limit_ changes.
+ void RecomputeBufferLimits();
+
+ // Writes an error message saying that we hit total_bytes_limit_.
+ void PrintTotalBytesLimitError();
+
+ // Called when the buffer runs out to request more data. Implies an
+ // Advance(buffer_size_).
+ bool Refresh();
+
+ bool ReadVarint32Fallback(uint32* value);
+};
+
+// Class which encodes and writes binary data which is composed of varint-
+// encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream.
+// Most users will not need to deal with CodedOutputStream.
+//
+// Most methods of CodedOutputStream which return a bool return false if an
+// underlying I/O error occurs. Once such a failure occurs, the
+// CodedOutputStream is broken and is no longer useful.
+class LIBPROTOBUF_EXPORT CodedOutputStream {
+ public:
+ // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
+ explicit CodedOutputStream(ZeroCopyOutputStream* output);
+
+ // Destroy the CodedOutputStream and position the underlying
+ // ZeroCopyOutputStream immediately after the last byte written.
+ ~CodedOutputStream();
+
+ // Write raw bytes, copying them from the given buffer.
+ bool WriteRaw(const void* buffer, int size);
+
+ // Equivalent to WriteRaw(str.data(), str.size()).
+ bool WriteString(const string& str);
+
+
+ // Write a 32-bit little-endian integer.
+ bool WriteLittleEndian32(uint32 value);
+ // Write a 64-bit little-endian integer.
+ bool WriteLittleEndian64(uint64 value);
+
+ // Write an unsigned integer with Varint encoding. Writing a 32-bit value
+ // is equivalent to casting it to uint64 and writing it as a 64-bit value,
+ // but may be more efficient.
+ bool WriteVarint32(uint32 value);
+ // Write an unsigned integer with Varint encoding.
+ bool WriteVarint64(uint64 value);
+
+ // Equivalent to WriteVarint32() except when the value is negative,
+ // in which case it must be sign-extended to a full 10 bytes.
+ bool WriteVarint32SignExtended(int32 value);
+
+ // This is identical to WriteVarint32(), but optimized for writing tags.
+ // In particular, if the input is a compile-time constant, this method
+ // compiles down to a couple instructions.
+ // Always inline because otherwise the aformentioned optimization can't work,
+ // but GCC by default doesn't want to inline this.
+ bool WriteTag(uint32 value) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
+ // Returns the number of bytes needed to encode the given value as a varint.
+ static int VarintSize32(uint32 value);
+ // Returns the number of bytes needed to encode the given value as a varint.
+ static int VarintSize64(uint64 value);
+
+ // If negative, 10 bytes. Otheriwse, same as VarintSize32().
+ static int VarintSize32SignExtended(int32 value);
+
+ // Returns the total number of bytes written since this object was created.
+ inline int ByteCount() const;
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
+
+ ZeroCopyOutputStream* output_;
+ uint8* buffer_;
+ int buffer_size_;
+ int total_bytes_; // Sum of sizes of all buffers seen so far.
+
+ // Advance the buffer by a given number of bytes.
+ void Advance(int amount);
+
+ // Called when the buffer runs out to request more data. Implies an
+ // Advance(buffer_size_).
+ bool Refresh();
+
+ bool WriteVarint32Fallback(uint32 value);
+ static int VarintSize32Fallback(uint32 value);
+};
+
+// inline methods ====================================================
+// The vast majority of varints are only one byte. These inline
+// methods optimize for that case.
+
+inline bool CodedInputStream::ReadVarint32(uint32* value) {
+ if (buffer_size_ != 0 && *buffer_ < 0x80) {
+ *value = *buffer_;
+ Advance(1);
+ return true;
+ } else {
+ return ReadVarint32Fallback(value);
+ }
+}
+
+inline uint32 CodedInputStream::ReadTag() {
+ if (buffer_size_ != 0 && buffer_[0] < 0x80) {
+ last_tag_ = buffer_[0];
+ Advance(1);
+ return last_tag_;
+ } else if (buffer_size_ >= 2 && buffer_[1] < 0x80) {
+ last_tag_ = (buffer_[0] & 0x7f) + (buffer_[1] << 7);
+ Advance(2);
+ return last_tag_;
+ } else if (ReadVarint32Fallback(&last_tag_)) {
+ return last_tag_;
+ } else {
+ last_tag_ = 0;
+ return 0;
+ }
+}
+
+inline bool CodedInputStream::LastTagWas(uint32 expected) {
+ return last_tag_ == expected;
+}
+
+inline bool CodedInputStream::ConsumedEntireMessage() {
+ return legitimate_message_end_;
+}
+
+inline bool CodedInputStream::ExpectTag(uint32 expected) {
+ if (expected < (1 << 7)) {
+ if (buffer_size_ != 0 && buffer_[0] == expected) {
+ Advance(1);
+ return true;
+ } else {
+ return false;
+ }
+ } else if (expected < (1 << 14)) {
+ if (buffer_size_ >= 2 &&
+ buffer_[0] == static_cast<uint8>(expected | 0x80) &&
+ buffer_[1] == static_cast<uint8>(expected >> 7)) {
+ Advance(2);
+ return true;
+ } else {
+ return false;
+ }
+ } else {
+ // Don't bother optimizing for larger values.
+ return false;
+ }
+}
+
+inline bool CodedInputStream::ExpectAtEnd() {
+ // If we are at a limit we know no more bytes can be read. Otherwise, it's
+ // hard to say without calling Refresh(), and we'd rather not do that.
+
+ if (buffer_size_ == 0 && buffer_size_after_limit_ != 0) {
+ last_tag_ = 0; // Pretend we called ReadTag()...
+ legitimate_message_end_ = true; // ... and it hit EOF.
+ return true;
+ } else {
+ return false;
+ }
+}
+
+inline bool CodedOutputStream::WriteVarint32(uint32 value) {
+ if (value < 0x80 && buffer_size_ > 0) {
+ *buffer_ = value;
+ Advance(1);
+ return true;
+ } else {
+ return WriteVarint32Fallback(value);
+ }
+}
+
+inline bool CodedOutputStream::WriteVarint32SignExtended(int32 value) {
+ if (value < 0) {
+ return WriteVarint64(static_cast<uint64>(value));
+ } else {
+ return WriteVarint32(static_cast<uint32>(value));
+ }
+}
+
+inline bool CodedOutputStream::WriteTag(uint32 value) {
+ if (value < (1 << 7)) {
+ if (buffer_size_ != 0) {
+ buffer_[0] = value;
+ Advance(1);
+ return true;
+ }
+ } else if (value < (1 << 14)) {
+ if (buffer_size_ >= 2) {
+ buffer_[0] = static_cast<uint8>(value | 0x80);
+ buffer_[1] = static_cast<uint8>(value >> 7);
+ Advance(2);
+ return true;
+ }
+ }
+ return WriteVarint32Fallback(value);
+}
+
+inline int CodedOutputStream::VarintSize32(uint32 value) {
+ if (value < (1 << 7)) {
+ return 1;
+ } else {
+ return VarintSize32Fallback(value);
+ }
+}
+
+inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
+ if (value < 0) {
+ return 10; // TODO(kenton): Make this a symbolic constant.
+ } else {
+ return VarintSize32(static_cast<uint32>(value));
+ }
+}
+
+inline bool CodedOutputStream::WriteString(const string& str) {
+ return WriteRaw(str.data(), str.size());
+}
+
+inline int CodedOutputStream::ByteCount() const {
+ return total_bytes_ - buffer_size_;
+}
+
+inline void CodedInputStream::Advance(int amount) {
+ buffer_ += amount;
+ buffer_size_ -= amount;
+}
+
+inline void CodedOutputStream::Advance(int amount) {
+ buffer_ += amount;
+ buffer_size_ -= amount;
+}
+
+inline void CodedInputStream::SetRecursionLimit(int limit) {
+ recursion_limit_ = limit;
+}
+
+inline bool CodedInputStream::IncrementRecursionDepth() {
+ ++recursion_depth_;
+ return recursion_depth_ <= recursion_limit_;
+}
+
+inline void CodedInputStream::DecrementRecursionDepth() {
+ if (recursion_depth_ > 0) --recursion_depth_;
+}
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__