aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/google/protobuf/wire_format.h
diff options
context:
space:
mode:
authorGravatar kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2009-07-29 01:13:20 +0000
committerGravatar kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2009-07-29 01:13:20 +0000
commit80b1d62bfcea65c59e2160da71dad84b1bd19cef (patch)
tree5423b830c53174fec83a7ea01ff0877e11c1ddb6 /src/google/protobuf/wire_format.h
parentd2fd0638c309113ccae3731a58e30419f522269a (diff)
Submit recent changes from internal branch, including "lite mode" for
C++ and Java. See CHANGES.txt for more details.
Diffstat (limited to 'src/google/protobuf/wire_format.h')
-rw-r--r--src/google/protobuf/wire_format.h436
1 files changed, 83 insertions, 353 deletions
diff --git a/src/google/protobuf/wire_format.h b/src/google/protobuf/wire_format.h
index 963f427a..c7539250 100644
--- a/src/google/protobuf/wire_format.h
+++ b/src/google/protobuf/wire_format.h
@@ -40,17 +40,23 @@
#define GOOGLE_PROTOBUF_WIRE_FORMAT_H__
#include <string>
-#include <google/protobuf/message.h>
+#include <google/protobuf/descriptor.pb.h>
#include <google/protobuf/descriptor.h>
+#include <google/protobuf/message.h>
+#include <google/protobuf/wire_format_lite.h>
-namespace google {
+// Do UTF-8 validation on string type in Debug build only
+#ifndef NDEBUG
+#define GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+#endif
+namespace google {
namespace protobuf {
namespace io {
class CodedInputStream; // coded_stream.h
class CodedOutputStream; // coded_stream.h
}
- class UnknownFieldSet; // unknown_field_set.h
+ class UnknownFieldSet; // unknown_field_set.h
}
namespace protobuf {
@@ -60,13 +66,26 @@ namespace internal {
// protocol-complier-generated message classes. It must not be called
// directly by clients.
//
-// This class contains helpers for implementing the binary protocol buffer
-// wire format. These helpers are called primarily by generated code. The
-// class also contains reflection-based implementations of the wire format.
+// This class contains code for implementing the binary protocol buffer
+// wire format via reflection. The WireFormatLite class implements the
+// non-reflection based routines.
//
-// This class is really a namespace that contains only static methods.
+// This class is really a namespace that contains only static methods
class LIBPROTOBUF_EXPORT WireFormat {
public:
+
+ // Given a field return its WireType
+ static inline WireFormatLite::WireType WireTypeForField(
+ const FieldDescriptor* field);
+
+ // Given a FieldSescriptor::Type return its WireType
+ static inline WireFormatLite::WireType WireTypeForFieldType(
+ FieldDescriptor::Type type);
+
+ // Compute the byte size of a tag. For groups, this includes both the start
+ // and end tags.
+ static inline int TagSize(int field_number, FieldDescriptor::Type type);
+
// These procedures can be used to implement the methods of Message which
// handle parsing and serialization of the protocol buffer wire format
// using only the Reflection interface. When you ask the protocol
@@ -152,41 +171,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
static int ComputeUnknownMessageSetItemsSize(
const UnknownFieldSet& unknown_fields);
- // -----------------------------------------------------------------
- // Helper constants and functions related to the format. These are
- // mostly meant for internal and generated code to use.
-
- // The wire format is composed of a sequence of tag/value pairs, each
- // of which contains the value of one field (or one element of a repeated
- // field). Each tag is encoded as a varint. The lower bits of the tag
- // identify its wire type, which specifies the format of the data to follow.
- // The rest of the bits contain the field number. Each type of field (as
- // declared by FieldDescriptor::Type, in descriptor.h) maps to one of
- // these wire types. Immediately following each tag is the field's value,
- // encoded in the format specified by the wire type. Because the tag
- // identifies the encoding of this data, it is possible to skip
- // unrecognized fields for forwards compatibility.
-
- enum WireType {
- WIRETYPE_VARINT = 0,
- WIRETYPE_FIXED64 = 1,
- WIRETYPE_LENGTH_DELIMITED = 2,
- WIRETYPE_START_GROUP = 3,
- WIRETYPE_END_GROUP = 4,
- WIRETYPE_FIXED32 = 5,
- };
-
- static inline WireType WireTypeForFieldType(FieldDescriptor::Type type) {
- return kWireTypeForFieldType[type];
- }
- // This is different from WireTypeForFieldType(field->type()) in the case of
- // packed repeated fields.
- static inline WireType WireTypeForField(const FieldDescriptor* field);
-
- // Number of bits in a tag which identify the wire type.
- static const int kTagTypeBits = 3;
- // Mask for those bits.
- static const uint32 kTagTypeMask = (1 << kTagTypeBits) - 1;
// Helper functions for encoding and decoding tags. (Inlined below and in
// _inl.h)
@@ -194,28 +178,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
// This is different from MakeTag(field->number(), field->type()) in the case
// of packed repeated fields.
static uint32 MakeTag(const FieldDescriptor* field);
- static uint32 MakeTag(int field_number, WireType type);
- static WireType GetTagWireType(uint32 tag);
- static int GetTagFieldNumber(uint32 tag);
-
- // Helper functions for converting between floats/doubles and IEEE-754
- // uint32s/uint64s so that they can be written. (Assumes your platform
- // uses IEEE-754 floats.)
- static uint32 EncodeFloat(float value);
- static float DecodeFloat(uint32 value);
- static uint64 EncodeDouble(double value);
- static double DecodeDouble(uint64 value);
-
- // Helper functions for mapping signed integers to unsigned integers in
- // such a way that numbers with small magnitudes will encode to smaller
- // varints. If you simply static_cast a negative number to an unsigned
- // number and varint-encode it, it will always take 10 bytes, defeating
- // the purpose of varint. So, for the "sint32" and "sint64" field types,
- // we ZigZag-encode the values.
- static uint32 ZigZagEncode32(int32 n);
- static int32 ZigZagDecode32(uint32 n);
- static uint64 ZigZagEncode64(int64 n);
- static int64 ZigZagDecode64(uint64 n);
// Parse a single field. The input should start out positioned immidately
// after the tag.
@@ -238,223 +200,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
const FieldDescriptor* field, // Cannot be NULL
const Message& message);
- // =================================================================
- // Methods for reading/writing individual field. The implementations
- // of these methods are defined in wire_format_inl.h; you must #include
- // that file to use these.
-
-// Avoid ugly line wrapping
-#define input io::CodedInputStream* input
-#define output io::CodedOutputStream* output
-#define field_number int field_number
-#define INL GOOGLE_ATTRIBUTE_ALWAYS_INLINE
-
- // Read fields, not including tags. The assumption is that you already
- // read the tag to determine what field to read.
- static inline bool ReadInt32 (input, int32* value);
- static inline bool ReadInt64 (input, int64* value);
- static inline bool ReadUInt32 (input, uint32* value);
- static inline bool ReadUInt64 (input, uint64* value);
- static inline bool ReadSInt32 (input, int32* value);
- static inline bool ReadSInt64 (input, int64* value);
- static inline bool ReadFixed32 (input, uint32* value);
- static inline bool ReadFixed64 (input, uint64* value);
- static inline bool ReadSFixed32(input, int32* value);
- static inline bool ReadSFixed64(input, int64* value);
- static inline bool ReadFloat (input, float* value);
- static inline bool ReadDouble (input, double* value);
- static inline bool ReadBool (input, bool* value);
- static inline bool ReadEnum (input, int* value);
-
- static inline bool ReadString(input, string* value);
- static inline bool ReadBytes (input, string* value);
-
- static inline bool ReadGroup (field_number, input, Message* value);
- static inline bool ReadMessage(input, Message* value);
-
- // Like above, but de-virtualize the call to MergePartialFromCodedStream().
- // The pointer must point at an instance of MessageType, *not* a subclass (or
- // the subclass must not override MergePartialFromCodedStream()).
- template<typename MessageType>
- static inline bool ReadGroupNoVirtual(field_number, input,
- MessageType* value);
- template<typename MessageType>
- static inline bool ReadMessageNoVirtual(input, MessageType* value);
-
- // Write a tag. The Write*() functions typically include the tag, so
- // normally there's no need to call this unless using the Write*NoTag()
- // variants.
- static inline void WriteTag(field_number, WireType type, output) INL;
-
- // Write fields, without tags.
- static inline void WriteInt32NoTag (int32 value, output) INL;
- static inline void WriteInt64NoTag (int64 value, output) INL;
- static inline void WriteUInt32NoTag (uint32 value, output) INL;
- static inline void WriteUInt64NoTag (uint64 value, output) INL;
- static inline void WriteSInt32NoTag (int32 value, output) INL;
- static inline void WriteSInt64NoTag (int64 value, output) INL;
- static inline void WriteFixed32NoTag (uint32 value, output) INL;
- static inline void WriteFixed64NoTag (uint64 value, output) INL;
- static inline void WriteSFixed32NoTag(int32 value, output) INL;
- static inline void WriteSFixed64NoTag(int64 value, output) INL;
- static inline void WriteFloatNoTag (float value, output) INL;
- static inline void WriteDoubleNoTag (double value, output) INL;
- static inline void WriteBoolNoTag (bool value, output) INL;
- static inline void WriteEnumNoTag (int value, output) INL;
-
- // Write fields, including tags.
- static inline void WriteInt32 (field_number, int32 value, output) INL;
- static inline void WriteInt64 (field_number, int64 value, output) INL;
- static inline void WriteUInt32 (field_number, uint32 value, output) INL;
- static inline void WriteUInt64 (field_number, uint64 value, output) INL;
- static inline void WriteSInt32 (field_number, int32 value, output) INL;
- static inline void WriteSInt64 (field_number, int64 value, output) INL;
- static inline void WriteFixed32 (field_number, uint32 value, output) INL;
- static inline void WriteFixed64 (field_number, uint64 value, output) INL;
- static inline void WriteSFixed32(field_number, int32 value, output) INL;
- static inline void WriteSFixed64(field_number, int64 value, output) INL;
- static inline void WriteFloat (field_number, float value, output) INL;
- static inline void WriteDouble (field_number, double value, output) INL;
- static inline void WriteBool (field_number, bool value, output) INL;
- static inline void WriteEnum (field_number, int value, output) INL;
-
- static inline void WriteString(field_number, const string& value, output) INL;
- static inline void WriteBytes (field_number, const string& value, output) INL;
-
- static inline void WriteGroup(field_number, const Message& value, output) INL;
- static inline void WriteMessage(
- field_number, const Message& value, output) INL;
-
- // Like above, but de-virtualize the call to SerializeWithCachedSizes(). The
- // pointer must point at an instance of MessageType, *not* a subclass (or
- // the subclass must not override SerializeWithCachedSizes()).
- template<typename MessageType>
- static inline void WriteGroupNoVirtual(
- field_number, const MessageType& value, output) INL;
- template<typename MessageType>
- static inline void WriteMessageNoVirtual(
- field_number, const MessageType& value, output) INL;
-
-#undef output
-#define output uint8* target
-
- // Like above, but use only *ToArray methods of CodedOutputStream.
- static inline uint8* WriteTagToArray(field_number, WireType type, output) INL;
-
- // Write fields, without tags.
- static inline uint8* WriteInt32NoTagToArray (int32 value, output) INL;
- static inline uint8* WriteInt64NoTagToArray (int64 value, output) INL;
- static inline uint8* WriteUInt32NoTagToArray (uint32 value, output) INL;
- static inline uint8* WriteUInt64NoTagToArray (uint64 value, output) INL;
- static inline uint8* WriteSInt32NoTagToArray (int32 value, output) INL;
- static inline uint8* WriteSInt64NoTagToArray (int64 value, output) INL;
- static inline uint8* WriteFixed32NoTagToArray (uint32 value, output) INL;
- static inline uint8* WriteFixed64NoTagToArray (uint64 value, output) INL;
- static inline uint8* WriteSFixed32NoTagToArray(int32 value, output) INL;
- static inline uint8* WriteSFixed64NoTagToArray(int64 value, output) INL;
- static inline uint8* WriteFloatNoTagToArray (float value, output) INL;
- static inline uint8* WriteDoubleNoTagToArray (double value, output) INL;
- static inline uint8* WriteBoolNoTagToArray (bool value, output) INL;
- static inline uint8* WriteEnumNoTagToArray (int value, output) INL;
-
- // Write fields, including tags.
- static inline uint8* WriteInt32ToArray(
- field_number, int32 value, output) INL;
- static inline uint8* WriteInt64ToArray(
- field_number, int64 value, output) INL;
- static inline uint8* WriteUInt32ToArray(
- field_number, uint32 value, output) INL;
- static inline uint8* WriteUInt64ToArray(
- field_number, uint64 value, output) INL;
- static inline uint8* WriteSInt32ToArray(
- field_number, int32 value, output) INL;
- static inline uint8* WriteSInt64ToArray(
- field_number, int64 value, output) INL;
- static inline uint8* WriteFixed32ToArray(
- field_number, uint32 value, output) INL;
- static inline uint8* WriteFixed64ToArray(
- field_number, uint64 value, output) INL;
- static inline uint8* WriteSFixed32ToArray(
- field_number, int32 value, output) INL;
- static inline uint8* WriteSFixed64ToArray(
- field_number, int64 value, output) INL;
- static inline uint8* WriteFloatToArray(
- field_number, float value, output) INL;
- static inline uint8* WriteDoubleToArray(
- field_number, double value, output) INL;
- static inline uint8* WriteBoolToArray(
- field_number, bool value, output) INL;
- static inline uint8* WriteEnumToArray(
- field_number, int value, output) INL;
-
- static inline uint8* WriteStringToArray(
- field_number, const string& value, output) INL;
- static inline uint8* WriteBytesToArray(
- field_number, const string& value, output) INL;
-
- static inline uint8* WriteGroupToArray(
- field_number, const Message& value, output) INL;
- static inline uint8* WriteMessageToArray(
- field_number, const Message& value, output) INL;
-
- // Like above, but de-virtualize the call to SerializeWithCachedSizes(). The
- // pointer must point at an instance of MessageType, *not* a subclass (or
- // the subclass must not override SerializeWithCachedSizes()).
- template<typename MessageType>
- static inline uint8* WriteGroupNoVirtualToArray(
- field_number, const MessageType& value, output) INL;
- template<typename MessageType>
- static inline uint8* WriteMessageNoVirtualToArray(
- field_number, const MessageType& value, output) INL;
-
-#undef output
-#undef input
-#undef INL
-
- // Compute the byte size of a tag. For groups, this includes both the start
- // and end tags.
- static inline int TagSize(field_number, FieldDescriptor::Type type);
-
-#undef field_number
-
- // Compute the byte size of a field. The XxSize() functions do NOT include
- // the tag, so you must also call TagSize(). (This is because, for repeated
- // fields, you should only call TagSize() once and multiply it by the element
- // count, but you may have to call XxSize() for each individual element.)
- static inline int Int32Size ( int32 value);
- static inline int Int64Size ( int64 value);
- static inline int UInt32Size (uint32 value);
- static inline int UInt64Size (uint64 value);
- static inline int SInt32Size ( int32 value);
- static inline int SInt64Size ( int64 value);
- static inline int EnumSize ( int value);
-
- // These types always have the same size.
- static const int kFixed32Size = 4;
- static const int kFixed64Size = 8;
- static const int kSFixed32Size = 4;
- static const int kSFixed64Size = 8;
- static const int kFloatSize = 4;
- static const int kDoubleSize = 8;
- static const int kBoolSize = 1;
-
- static inline int StringSize(const string& value);
- static inline int BytesSize (const string& value);
-
- static inline int GroupSize (const Message& value);
- static inline int MessageSize(const Message& value);
-
- // Like above, but de-virtualize the call to ByteSize(). The
- // pointer must point at an instance of MessageType, *not* a subclass (or
- // the subclass must not override ByteSize()).
- template<typename MessageType>
- static inline int GroupSizeNoVirtual (const MessageType& value);
- template<typename MessageType>
- static inline int MessageSizeNoVirtual(const MessageType& value);
-
- private:
- static const WireType kWireTypeForFieldType[];
-
// Parse/serialize a MessageSet::Item group. Used with messages that use
// opion message_set_wire_format = true.
static bool ParseAndMergeMessageSetItem(
@@ -476,97 +221,82 @@ class LIBPROTOBUF_EXPORT WireFormat {
const FieldDescriptor* field, // Cannot be NULL
const Message& message);
- GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat);
-};
+ enum Operation {
+ PARSE,
+ SERIALIZE,
+ };
-// inline methods ====================================================
+ // Verifies that a string field is valid UTF8, logging an error if not.
+ static void VerifyUTF8String(const char* data, int size, Operation op);
-// This macro does the same thing as WireFormat::MakeTag(), but the
-// result is usable as a compile-time constant, which makes it usable
-// as a switch case or a template input. WireFormat::MakeTag() is more
-// type-safe, though, so prefer it if possible.
-#define GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(FIELD_NUMBER, TYPE) \
- static_cast<uint32>( \
- ((FIELD_NUMBER) << ::google::protobuf::internal::WireFormat::kTagTypeBits) | (TYPE))
+ private:
+ // Verifies that a string field is valid UTF8, logging an error if not.
+ static void VerifyUTF8StringFallback(
+ const char* data,
+ int size,
+ Operation op);
-inline uint32 WireFormat::MakeTag(int field_number, WireType type) {
- return GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(field_number, type);
-}
-inline WireFormat::WireType WireFormat::GetTagWireType(uint32 tag) {
- return static_cast<WireType>(tag & kTagTypeMask);
-}
-inline int WireFormat::GetTagFieldNumber(uint32 tag) {
- return static_cast<int>(tag >> kTagTypeBits);
-}
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat);
+};
-inline uint32 WireFormat::EncodeFloat(float value) {
- union {float f; uint32 i;};
- f = value;
- return i;
-}
+// Subclass of FieldSkipper which saves skipped fields to an UnknownFieldSet.
+class LIBPROTOBUF_EXPORT UnknownFieldSetFieldSkipper : public FieldSkipper {
+ public:
+ UnknownFieldSetFieldSkipper(UnknownFieldSet* unknown_fields)
+ : unknown_fields_(unknown_fields) {}
+ virtual ~UnknownFieldSetFieldSkipper() {}
-inline float WireFormat::DecodeFloat(uint32 value) {
- union {float f; uint32 i;};
- i = value;
- return f;
-}
+ // implements FieldSkipper -----------------------------------------
+ virtual bool SkipField(io::CodedInputStream* input, uint32 tag);
+ virtual bool SkipMessage(io::CodedInputStream* input);
+ virtual void SkipUnknownEnum(int field_number, int value);
-inline uint64 WireFormat::EncodeDouble(double value) {
- union {double f; uint64 i;};
- f = value;
- return i;
-}
+ private:
+ UnknownFieldSet* unknown_fields_;
+};
-inline double WireFormat::DecodeDouble(uint64 value) {
- union {double f; uint64 i;};
- i = value;
- return f;
-}
+// inline methods ====================================================
-// ZigZag Transform: Encodes signed integers so that they can be
-// effectively used with varint encoding.
-//
-// varint operates on unsigned integers, encoding smaller numbers into
-// fewer bytes. If you try to use it on a signed integer, it will treat
-// this number as a very large unsigned integer, which means that even
-// small signed numbers like -1 will take the maximum number of bytes
-// (10) to encode. ZigZagEncode() maps signed integers to unsigned
-// in such a way that those with a small absolute value will have smaller
-// encoded values, making them appropriate for encoding using varint.
-//
-// int32 -> uint32
-// -------------------------
-// 0 -> 0
-// -1 -> 1
-// 1 -> 2
-// -2 -> 3
-// ... -> ...
-// 2147483647 -> 4294967294
-// -2147483648 -> 4294967295
-//
-// >> encode >>
-// << decode <<
+inline WireFormatLite::WireType WireFormat::WireTypeForField(
+ const FieldDescriptor* field) {
+ if (field->options().packed()) {
+ return WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
+ } else {
+ return WireTypeForFieldType(field->type());
+ }
+}
-inline uint32 WireFormat::ZigZagEncode32(int32 n) {
- // Note: the right-shift must be arithmetic
- return (n << 1) ^ (n >> 31);
+inline WireFormatLite::WireType WireFormat::WireTypeForFieldType(
+ FieldDescriptor::Type type) {
+ // Some compilers don't like enum -> enum casts, so we implicit_cast to
+ // int first.
+ return WireFormatLite::WireTypeForFieldType(
+ static_cast<WireFormatLite::FieldType>(
+ implicit_cast<int>(type)));
}
-inline int32 WireFormat::ZigZagDecode32(uint32 n) {
- return (n >> 1) ^ -static_cast<int32>(n & 1);
+inline uint32 WireFormat::MakeTag(const FieldDescriptor* field) {
+ return WireFormatLite::MakeTag(field->number(), WireTypeForField(field));
}
-inline uint64 WireFormat::ZigZagEncode64(int64 n) {
- // Note: the right-shift must be arithmetic
- return (n << 1) ^ (n >> 63);
+inline int WireFormat::TagSize(int field_number, FieldDescriptor::Type type) {
+ // Some compilers don't like enum -> enum casts, so we implicit_cast to
+ // int first.
+ return WireFormatLite::TagSize(field_number,
+ static_cast<WireFormatLite::FieldType>(
+ implicit_cast<int>(type)));
}
-inline int64 WireFormat::ZigZagDecode64(uint64 n) {
- return (n >> 1) ^ -static_cast<int64>(n & 1);
+inline void WireFormat::VerifyUTF8String(const char* data, int size,
+ WireFormat::Operation op) {
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+ WireFormat::VerifyUTF8StringFallback(data, size, op);
+#endif
}
+
} // namespace internal
} // namespace protobuf