From 80b1d62bfcea65c59e2160da71dad84b1bd19cef Mon Sep 17 00:00:00 2001 From: "kenton@google.com" Date: Wed, 29 Jul 2009 01:13:20 +0000 Subject: Submit recent changes from internal branch, including "lite mode" for C++ and Java. See CHANGES.txt for more details. --- src/google/protobuf/wire_format.h | 436 ++++++++------------------------------ 1 file changed, 83 insertions(+), 353 deletions(-) (limited to 'src/google/protobuf/wire_format.h') diff --git a/src/google/protobuf/wire_format.h b/src/google/protobuf/wire_format.h index 963f427a..c7539250 100644 --- a/src/google/protobuf/wire_format.h +++ b/src/google/protobuf/wire_format.h @@ -40,17 +40,23 @@ #define GOOGLE_PROTOBUF_WIRE_FORMAT_H__ #include -#include +#include #include +#include +#include -namespace google { +// Do UTF-8 validation on string type in Debug build only +#ifndef NDEBUG +#define GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED +#endif +namespace google { namespace protobuf { namespace io { class CodedInputStream; // coded_stream.h class CodedOutputStream; // coded_stream.h } - class UnknownFieldSet; // unknown_field_set.h + class UnknownFieldSet; // unknown_field_set.h } namespace protobuf { @@ -60,13 +66,26 @@ namespace internal { // protocol-complier-generated message classes. It must not be called // directly by clients. // -// This class contains helpers for implementing the binary protocol buffer -// wire format. These helpers are called primarily by generated code. The -// class also contains reflection-based implementations of the wire format. +// This class contains code for implementing the binary protocol buffer +// wire format via reflection. The WireFormatLite class implements the +// non-reflection based routines. // -// This class is really a namespace that contains only static methods. +// This class is really a namespace that contains only static methods class LIBPROTOBUF_EXPORT WireFormat { public: + + // Given a field return its WireType + static inline WireFormatLite::WireType WireTypeForField( + const FieldDescriptor* field); + + // Given a FieldSescriptor::Type return its WireType + static inline WireFormatLite::WireType WireTypeForFieldType( + FieldDescriptor::Type type); + + // Compute the byte size of a tag. For groups, this includes both the start + // and end tags. + static inline int TagSize(int field_number, FieldDescriptor::Type type); + // These procedures can be used to implement the methods of Message which // handle parsing and serialization of the protocol buffer wire format // using only the Reflection interface. When you ask the protocol @@ -152,41 +171,6 @@ class LIBPROTOBUF_EXPORT WireFormat { static int ComputeUnknownMessageSetItemsSize( const UnknownFieldSet& unknown_fields); - // ----------------------------------------------------------------- - // Helper constants and functions related to the format. These are - // mostly meant for internal and generated code to use. - - // The wire format is composed of a sequence of tag/value pairs, each - // of which contains the value of one field (or one element of a repeated - // field). Each tag is encoded as a varint. The lower bits of the tag - // identify its wire type, which specifies the format of the data to follow. - // The rest of the bits contain the field number. Each type of field (as - // declared by FieldDescriptor::Type, in descriptor.h) maps to one of - // these wire types. Immediately following each tag is the field's value, - // encoded in the format specified by the wire type. Because the tag - // identifies the encoding of this data, it is possible to skip - // unrecognized fields for forwards compatibility. - - enum WireType { - WIRETYPE_VARINT = 0, - WIRETYPE_FIXED64 = 1, - WIRETYPE_LENGTH_DELIMITED = 2, - WIRETYPE_START_GROUP = 3, - WIRETYPE_END_GROUP = 4, - WIRETYPE_FIXED32 = 5, - }; - - static inline WireType WireTypeForFieldType(FieldDescriptor::Type type) { - return kWireTypeForFieldType[type]; - } - // This is different from WireTypeForFieldType(field->type()) in the case of - // packed repeated fields. - static inline WireType WireTypeForField(const FieldDescriptor* field); - - // Number of bits in a tag which identify the wire type. - static const int kTagTypeBits = 3; - // Mask for those bits. - static const uint32 kTagTypeMask = (1 << kTagTypeBits) - 1; // Helper functions for encoding and decoding tags. (Inlined below and in // _inl.h) @@ -194,28 +178,6 @@ class LIBPROTOBUF_EXPORT WireFormat { // This is different from MakeTag(field->number(), field->type()) in the case // of packed repeated fields. static uint32 MakeTag(const FieldDescriptor* field); - static uint32 MakeTag(int field_number, WireType type); - static WireType GetTagWireType(uint32 tag); - static int GetTagFieldNumber(uint32 tag); - - // Helper functions for converting between floats/doubles and IEEE-754 - // uint32s/uint64s so that they can be written. (Assumes your platform - // uses IEEE-754 floats.) - static uint32 EncodeFloat(float value); - static float DecodeFloat(uint32 value); - static uint64 EncodeDouble(double value); - static double DecodeDouble(uint64 value); - - // Helper functions for mapping signed integers to unsigned integers in - // such a way that numbers with small magnitudes will encode to smaller - // varints. If you simply static_cast a negative number to an unsigned - // number and varint-encode it, it will always take 10 bytes, defeating - // the purpose of varint. So, for the "sint32" and "sint64" field types, - // we ZigZag-encode the values. - static uint32 ZigZagEncode32(int32 n); - static int32 ZigZagDecode32(uint32 n); - static uint64 ZigZagEncode64(int64 n); - static int64 ZigZagDecode64(uint64 n); // Parse a single field. The input should start out positioned immidately // after the tag. @@ -238,223 +200,6 @@ class LIBPROTOBUF_EXPORT WireFormat { const FieldDescriptor* field, // Cannot be NULL const Message& message); - // ================================================================= - // Methods for reading/writing individual field. The implementations - // of these methods are defined in wire_format_inl.h; you must #include - // that file to use these. - -// Avoid ugly line wrapping -#define input io::CodedInputStream* input -#define output io::CodedOutputStream* output -#define field_number int field_number -#define INL GOOGLE_ATTRIBUTE_ALWAYS_INLINE - - // Read fields, not including tags. The assumption is that you already - // read the tag to determine what field to read. - static inline bool ReadInt32 (input, int32* value); - static inline bool ReadInt64 (input, int64* value); - static inline bool ReadUInt32 (input, uint32* value); - static inline bool ReadUInt64 (input, uint64* value); - static inline bool ReadSInt32 (input, int32* value); - static inline bool ReadSInt64 (input, int64* value); - static inline bool ReadFixed32 (input, uint32* value); - static inline bool ReadFixed64 (input, uint64* value); - static inline bool ReadSFixed32(input, int32* value); - static inline bool ReadSFixed64(input, int64* value); - static inline bool ReadFloat (input, float* value); - static inline bool ReadDouble (input, double* value); - static inline bool ReadBool (input, bool* value); - static inline bool ReadEnum (input, int* value); - - static inline bool ReadString(input, string* value); - static inline bool ReadBytes (input, string* value); - - static inline bool ReadGroup (field_number, input, Message* value); - static inline bool ReadMessage(input, Message* value); - - // Like above, but de-virtualize the call to MergePartialFromCodedStream(). - // The pointer must point at an instance of MessageType, *not* a subclass (or - // the subclass must not override MergePartialFromCodedStream()). - template - static inline bool ReadGroupNoVirtual(field_number, input, - MessageType* value); - template - static inline bool ReadMessageNoVirtual(input, MessageType* value); - - // Write a tag. The Write*() functions typically include the tag, so - // normally there's no need to call this unless using the Write*NoTag() - // variants. - static inline void WriteTag(field_number, WireType type, output) INL; - - // Write fields, without tags. - static inline void WriteInt32NoTag (int32 value, output) INL; - static inline void WriteInt64NoTag (int64 value, output) INL; - static inline void WriteUInt32NoTag (uint32 value, output) INL; - static inline void WriteUInt64NoTag (uint64 value, output) INL; - static inline void WriteSInt32NoTag (int32 value, output) INL; - static inline void WriteSInt64NoTag (int64 value, output) INL; - static inline void WriteFixed32NoTag (uint32 value, output) INL; - static inline void WriteFixed64NoTag (uint64 value, output) INL; - static inline void WriteSFixed32NoTag(int32 value, output) INL; - static inline void WriteSFixed64NoTag(int64 value, output) INL; - static inline void WriteFloatNoTag (float value, output) INL; - static inline void WriteDoubleNoTag (double value, output) INL; - static inline void WriteBoolNoTag (bool value, output) INL; - static inline void WriteEnumNoTag (int value, output) INL; - - // Write fields, including tags. - static inline void WriteInt32 (field_number, int32 value, output) INL; - static inline void WriteInt64 (field_number, int64 value, output) INL; - static inline void WriteUInt32 (field_number, uint32 value, output) INL; - static inline void WriteUInt64 (field_number, uint64 value, output) INL; - static inline void WriteSInt32 (field_number, int32 value, output) INL; - static inline void WriteSInt64 (field_number, int64 value, output) INL; - static inline void WriteFixed32 (field_number, uint32 value, output) INL; - static inline void WriteFixed64 (field_number, uint64 value, output) INL; - static inline void WriteSFixed32(field_number, int32 value, output) INL; - static inline void WriteSFixed64(field_number, int64 value, output) INL; - static inline void WriteFloat (field_number, float value, output) INL; - static inline void WriteDouble (field_number, double value, output) INL; - static inline void WriteBool (field_number, bool value, output) INL; - static inline void WriteEnum (field_number, int value, output) INL; - - static inline void WriteString(field_number, const string& value, output) INL; - static inline void WriteBytes (field_number, const string& value, output) INL; - - static inline void WriteGroup(field_number, const Message& value, output) INL; - static inline void WriteMessage( - field_number, const Message& value, output) INL; - - // Like above, but de-virtualize the call to SerializeWithCachedSizes(). The - // pointer must point at an instance of MessageType, *not* a subclass (or - // the subclass must not override SerializeWithCachedSizes()). - template - static inline void WriteGroupNoVirtual( - field_number, const MessageType& value, output) INL; - template - static inline void WriteMessageNoVirtual( - field_number, const MessageType& value, output) INL; - -#undef output -#define output uint8* target - - // Like above, but use only *ToArray methods of CodedOutputStream. - static inline uint8* WriteTagToArray(field_number, WireType type, output) INL; - - // Write fields, without tags. - static inline uint8* WriteInt32NoTagToArray (int32 value, output) INL; - static inline uint8* WriteInt64NoTagToArray (int64 value, output) INL; - static inline uint8* WriteUInt32NoTagToArray (uint32 value, output) INL; - static inline uint8* WriteUInt64NoTagToArray (uint64 value, output) INL; - static inline uint8* WriteSInt32NoTagToArray (int32 value, output) INL; - static inline uint8* WriteSInt64NoTagToArray (int64 value, output) INL; - static inline uint8* WriteFixed32NoTagToArray (uint32 value, output) INL; - static inline uint8* WriteFixed64NoTagToArray (uint64 value, output) INL; - static inline uint8* WriteSFixed32NoTagToArray(int32 value, output) INL; - static inline uint8* WriteSFixed64NoTagToArray(int64 value, output) INL; - static inline uint8* WriteFloatNoTagToArray (float value, output) INL; - static inline uint8* WriteDoubleNoTagToArray (double value, output) INL; - static inline uint8* WriteBoolNoTagToArray (bool value, output) INL; - static inline uint8* WriteEnumNoTagToArray (int value, output) INL; - - // Write fields, including tags. - static inline uint8* WriteInt32ToArray( - field_number, int32 value, output) INL; - static inline uint8* WriteInt64ToArray( - field_number, int64 value, output) INL; - static inline uint8* WriteUInt32ToArray( - field_number, uint32 value, output) INL; - static inline uint8* WriteUInt64ToArray( - field_number, uint64 value, output) INL; - static inline uint8* WriteSInt32ToArray( - field_number, int32 value, output) INL; - static inline uint8* WriteSInt64ToArray( - field_number, int64 value, output) INL; - static inline uint8* WriteFixed32ToArray( - field_number, uint32 value, output) INL; - static inline uint8* WriteFixed64ToArray( - field_number, uint64 value, output) INL; - static inline uint8* WriteSFixed32ToArray( - field_number, int32 value, output) INL; - static inline uint8* WriteSFixed64ToArray( - field_number, int64 value, output) INL; - static inline uint8* WriteFloatToArray( - field_number, float value, output) INL; - static inline uint8* WriteDoubleToArray( - field_number, double value, output) INL; - static inline uint8* WriteBoolToArray( - field_number, bool value, output) INL; - static inline uint8* WriteEnumToArray( - field_number, int value, output) INL; - - static inline uint8* WriteStringToArray( - field_number, const string& value, output) INL; - static inline uint8* WriteBytesToArray( - field_number, const string& value, output) INL; - - static inline uint8* WriteGroupToArray( - field_number, const Message& value, output) INL; - static inline uint8* WriteMessageToArray( - field_number, const Message& value, output) INL; - - // Like above, but de-virtualize the call to SerializeWithCachedSizes(). The - // pointer must point at an instance of MessageType, *not* a subclass (or - // the subclass must not override SerializeWithCachedSizes()). - template - static inline uint8* WriteGroupNoVirtualToArray( - field_number, const MessageType& value, output) INL; - template - static inline uint8* WriteMessageNoVirtualToArray( - field_number, const MessageType& value, output) INL; - -#undef output -#undef input -#undef INL - - // Compute the byte size of a tag. For groups, this includes both the start - // and end tags. - static inline int TagSize(field_number, FieldDescriptor::Type type); - -#undef field_number - - // Compute the byte size of a field. The XxSize() functions do NOT include - // the tag, so you must also call TagSize(). (This is because, for repeated - // fields, you should only call TagSize() once and multiply it by the element - // count, but you may have to call XxSize() for each individual element.) - static inline int Int32Size ( int32 value); - static inline int Int64Size ( int64 value); - static inline int UInt32Size (uint32 value); - static inline int UInt64Size (uint64 value); - static inline int SInt32Size ( int32 value); - static inline int SInt64Size ( int64 value); - static inline int EnumSize ( int value); - - // These types always have the same size. - static const int kFixed32Size = 4; - static const int kFixed64Size = 8; - static const int kSFixed32Size = 4; - static const int kSFixed64Size = 8; - static const int kFloatSize = 4; - static const int kDoubleSize = 8; - static const int kBoolSize = 1; - - static inline int StringSize(const string& value); - static inline int BytesSize (const string& value); - - static inline int GroupSize (const Message& value); - static inline int MessageSize(const Message& value); - - // Like above, but de-virtualize the call to ByteSize(). The - // pointer must point at an instance of MessageType, *not* a subclass (or - // the subclass must not override ByteSize()). - template - static inline int GroupSizeNoVirtual (const MessageType& value); - template - static inline int MessageSizeNoVirtual(const MessageType& value); - - private: - static const WireType kWireTypeForFieldType[]; - // Parse/serialize a MessageSet::Item group. Used with messages that use // opion message_set_wire_format = true. static bool ParseAndMergeMessageSetItem( @@ -476,97 +221,82 @@ class LIBPROTOBUF_EXPORT WireFormat { const FieldDescriptor* field, // Cannot be NULL const Message& message); - GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat); -}; + enum Operation { + PARSE, + SERIALIZE, + }; -// inline methods ==================================================== + // Verifies that a string field is valid UTF8, logging an error if not. + static void VerifyUTF8String(const char* data, int size, Operation op); -// This macro does the same thing as WireFormat::MakeTag(), but the -// result is usable as a compile-time constant, which makes it usable -// as a switch case or a template input. WireFormat::MakeTag() is more -// type-safe, though, so prefer it if possible. -#define GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(FIELD_NUMBER, TYPE) \ - static_cast( \ - ((FIELD_NUMBER) << ::google::protobuf::internal::WireFormat::kTagTypeBits) | (TYPE)) + private: + // Verifies that a string field is valid UTF8, logging an error if not. + static void VerifyUTF8StringFallback( + const char* data, + int size, + Operation op); -inline uint32 WireFormat::MakeTag(int field_number, WireType type) { - return GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(field_number, type); -} -inline WireFormat::WireType WireFormat::GetTagWireType(uint32 tag) { - return static_cast(tag & kTagTypeMask); -} -inline int WireFormat::GetTagFieldNumber(uint32 tag) { - return static_cast(tag >> kTagTypeBits); -} + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat); +}; -inline uint32 WireFormat::EncodeFloat(float value) { - union {float f; uint32 i;}; - f = value; - return i; -} +// Subclass of FieldSkipper which saves skipped fields to an UnknownFieldSet. +class LIBPROTOBUF_EXPORT UnknownFieldSetFieldSkipper : public FieldSkipper { + public: + UnknownFieldSetFieldSkipper(UnknownFieldSet* unknown_fields) + : unknown_fields_(unknown_fields) {} + virtual ~UnknownFieldSetFieldSkipper() {} -inline float WireFormat::DecodeFloat(uint32 value) { - union {float f; uint32 i;}; - i = value; - return f; -} + // implements FieldSkipper ----------------------------------------- + virtual bool SkipField(io::CodedInputStream* input, uint32 tag); + virtual bool SkipMessage(io::CodedInputStream* input); + virtual void SkipUnknownEnum(int field_number, int value); -inline uint64 WireFormat::EncodeDouble(double value) { - union {double f; uint64 i;}; - f = value; - return i; -} + private: + UnknownFieldSet* unknown_fields_; +}; -inline double WireFormat::DecodeDouble(uint64 value) { - union {double f; uint64 i;}; - i = value; - return f; -} +// inline methods ==================================================== -// ZigZag Transform: Encodes signed integers so that they can be -// effectively used with varint encoding. -// -// varint operates on unsigned integers, encoding smaller numbers into -// fewer bytes. If you try to use it on a signed integer, it will treat -// this number as a very large unsigned integer, which means that even -// small signed numbers like -1 will take the maximum number of bytes -// (10) to encode. ZigZagEncode() maps signed integers to unsigned -// in such a way that those with a small absolute value will have smaller -// encoded values, making them appropriate for encoding using varint. -// -// int32 -> uint32 -// ------------------------- -// 0 -> 0 -// -1 -> 1 -// 1 -> 2 -// -2 -> 3 -// ... -> ... -// 2147483647 -> 4294967294 -// -2147483648 -> 4294967295 -// -// >> encode >> -// << decode << +inline WireFormatLite::WireType WireFormat::WireTypeForField( + const FieldDescriptor* field) { + if (field->options().packed()) { + return WireFormatLite::WIRETYPE_LENGTH_DELIMITED; + } else { + return WireTypeForFieldType(field->type()); + } +} -inline uint32 WireFormat::ZigZagEncode32(int32 n) { - // Note: the right-shift must be arithmetic - return (n << 1) ^ (n >> 31); +inline WireFormatLite::WireType WireFormat::WireTypeForFieldType( + FieldDescriptor::Type type) { + // Some compilers don't like enum -> enum casts, so we implicit_cast to + // int first. + return WireFormatLite::WireTypeForFieldType( + static_cast( + implicit_cast(type))); } -inline int32 WireFormat::ZigZagDecode32(uint32 n) { - return (n >> 1) ^ -static_cast(n & 1); +inline uint32 WireFormat::MakeTag(const FieldDescriptor* field) { + return WireFormatLite::MakeTag(field->number(), WireTypeForField(field)); } -inline uint64 WireFormat::ZigZagEncode64(int64 n) { - // Note: the right-shift must be arithmetic - return (n << 1) ^ (n >> 63); +inline int WireFormat::TagSize(int field_number, FieldDescriptor::Type type) { + // Some compilers don't like enum -> enum casts, so we implicit_cast to + // int first. + return WireFormatLite::TagSize(field_number, + static_cast( + implicit_cast(type))); } -inline int64 WireFormat::ZigZagDecode64(uint64 n) { - return (n >> 1) ^ -static_cast(n & 1); +inline void WireFormat::VerifyUTF8String(const char* data, int size, + WireFormat::Operation op) { +#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED + WireFormat::VerifyUTF8StringFallback(data, size, op); +#endif } + } // namespace internal } // namespace protobuf -- cgit v1.2.3