From 40ee551715c3a784ea6132dbf604b0e665ca2def Mon Sep 17 00:00:00 2001 From: temporal Date: Thu, 10 Jul 2008 02:12:20 +0000 Subject: Initial checkin. --- src/google/protobuf/wire_format.h | 446 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 446 insertions(+) create mode 100644 src/google/protobuf/wire_format.h (limited to 'src/google/protobuf/wire_format.h') diff --git a/src/google/protobuf/wire_format.h b/src/google/protobuf/wire_format.h new file mode 100644 index 00000000..d59f3fc1 --- /dev/null +++ b/src/google/protobuf/wire_format.h @@ -0,0 +1,446 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// atenasio@google.com (Chris Atenasio) (ZigZag transform) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This header is logically internal, but is made public because it is used +// from protocol-compiler-generated code, which may reside in other components. + +#ifndef GOOGLE_PROTOBUF_WIRE_FORMAT_H__ +#define GOOGLE_PROTOBUF_WIRE_FORMAT_H__ + +#include +#include // Message::Reflection +#include + +namespace google { + +namespace protobuf { + namespace io { + class CodedInputStream; // coded_stream.h + class CodedOutputStream; // coded_stream.h + } + class UnknownFieldSet; // unknown_field_set.h +} + +namespace protobuf { +namespace internal { + +// This class is for internal use by the protocol buffer library and by +// protocol-complier-generated message classes. It must not be called +// directly by clients. +// +// This class contains helpers for implementing the binary protocol buffer +// wire format. These helpers are called primarily by generated code. The +// class also contains reflection-based implementations of the wire format. +// +// This class is really a namespace that contains only static methods. +class LIBPROTOBUF_EXPORT WireFormat { + public: + // These procedures can be used to implement the methods of Message which + // handle parsing and serialization of the protocol buffer wire format + // using only the Message::Reflection interface. When you ask the protocol + // compiler to optimize for code size rather than speed, it will implement + // those methods in terms of these procedures. Of course, these are much + // slower than the specialized implementations which the protocol compiler + // generates when told to optimize for speed. + + // Read a message in protocol buffer wire format. + // + // This procedure reads either to the end of the input stream or through + // a WIRETYPE_END_GROUP tag ending the message, whichever comes first. + // It returns false if the input is invalid. + // + // Required fields are NOT checked by this method. You must call + // IsInitialized() on the resulting message yourself. + static bool ParseAndMergePartial(const Descriptor* descriptor, + io::CodedInputStream* input, + Message::Reflection* message_reflection); + + // Serialize a message in protocol buffer wire format. + // + // Any embedded messages within the message must have their correct sizes + // cached. However, the top-level message need not; its size is passed as + // a parameter to this procedure. + // + // These return false iff the underlying stream returns a write error. + static bool SerializeWithCachedSizes( + const Descriptor* descriptor, + const Message::Reflection* message_reflection, + int size, io::CodedOutputStream* output); + + // Implements Message::ByteSize() via reflection. WARNING: The result + // of this method is *not* cached anywhere. However, all embedded messages + // will have their ByteSize() methods called, so their sizes will be cached. + // Therefore, calling this method is sufficient to allow you to call + // WireFormat::SerializeWithCachedSizes() on the same object. + static int ByteSize(const Descriptor* descriptor, + const Message::Reflection* message_reflection); + + // ----------------------------------------------------------------- + // Helpers for dealing with unknown fields + + // Skips a field value of the given WireType. The input should start + // positioned immediately after the tag. If unknown_fields is non-NULL, + // the contents of the field will be added to it. + static bool SkipField(io::CodedInputStream* input, uint32 tag, + UnknownFieldSet* unknown_fields); + + // Reads and ignores a message from the input. If unknown_fields is non-NULL, + // the contents will be added to it. + static bool SkipMessage(io::CodedInputStream* input, + UnknownFieldSet* unknown_fields); + + // Write the contents of an UnknownFieldSet to the output. + static bool SerializeUnknownFields(const UnknownFieldSet& unknown_fields, + io::CodedOutputStream* output); + + // Same thing except for messages that have the message_set_wire_format + // option. + static bool SerializeUnknownMessageSetItems( + const UnknownFieldSet& unknown_fields, + io::CodedOutputStream* output); + + // Compute the size of the UnknownFieldSet on the wire. + static int ComputeUnknownFieldsSize(const UnknownFieldSet& unknown_fields); + + // Same thing except for messages that have the message_set_wire_format + // option. + static int ComputeUnknownMessageSetItemsSize( + const UnknownFieldSet& unknown_fields); + + // ----------------------------------------------------------------- + // Helper constants and functions related to the format. These are + // mostly meant for internal and generated code to use. + + // The wire format is composed of a sequence of tag/value pairs, each + // of which contains the value of one field (or one element of a repeated + // field). Each tag is encoded as a varint. The lower bits of the tag + // identify its wire type, which specifies the format of the data to follow. + // The rest of the bits contain the field number. Each type of field (as + // declared by FieldDescriptor::Type, in descriptor.h) maps to one of + // these wire types. Immediately following each tag is the field's value, + // encoded in the format specified by the wire type. Because the tag + // identifies the encoding of this data, it is possible to skip + // unrecognized fields for forwards compatibility. + + enum WireType { + WIRETYPE_VARINT = 0, + WIRETYPE_FIXED64 = 1, + WIRETYPE_LENGTH_DELIMITED = 2, + WIRETYPE_START_GROUP = 3, + WIRETYPE_END_GROUP = 4, + WIRETYPE_FIXED32 = 5, + }; + + static inline WireType WireTypeForFieldType(FieldDescriptor::Type type) { + return kWireTypeForFieldType[type]; + } + + // Number of bits in a tag which identify the wire type. + static const int kTagTypeBits = 3; + // Mask for those bits. + static const uint32 kTagTypeMask = (1 << kTagTypeBits) - 1; + + // Helper functions for encoding and decoding tags. (Inlined below.) + static uint32 MakeTag(const FieldDescriptor* field); + static uint32 MakeTag(int field_number, WireType type); + static WireType GetTagWireType(uint32 tag); + static int GetTagFieldNumber(uint32 tag); + + // Helper functions for converting between floats/doubles and IEEE-754 + // uint32s/uint64s so that they can be written. (Assumes your platform + // uses IEEE-754 floats.) + static uint32 EncodeFloat(float value); + static float DecodeFloat(uint32 value); + static uint64 EncodeDouble(double value); + static double DecodeDouble(uint64 value); + + // Helper functions for mapping signed integers to unsigned integers in + // such a way that numbers with small magnitudes will encode to smaller + // varints. If you simply static_cast a negative number to an unsigned + // number and varint-encode it, it will always take 10 bytes, defeating + // the purpose of varint. So, for the "sint32" and "sint64" field types, + // we ZigZag-encode the values. + static uint32 ZigZagEncode32(int32 n); + static int32 ZigZagDecode32(uint32 n); + static uint64 ZigZagEncode64(int64 n); + static int64 ZigZagDecode64(uint64 n); + + // Parse a single field. The input should start out positioned immidately + // after the tag. + static bool ParseAndMergeField( + uint32 tag, + const FieldDescriptor* field, // May be NULL for unknown + Message::Reflection* message_reflection, + io::CodedInputStream* input); + + // Serialize a single field. + static bool SerializeFieldWithCachedSizes( + const FieldDescriptor* field, // Cannot be NULL + const Message::Reflection* message_reflection, + io::CodedOutputStream* output); + + // Compute size of a single field. If the field is a message type, this + // will call ByteSize() for the embedded message, insuring that it caches + // its size. + static int FieldByteSize( + const FieldDescriptor* field, // Cannot be NULL + const Message::Reflection* message_reflection); + + // ================================================================= + // Methods for reading/writing individual field. The implementations + // of these methods are defined in wire_format_inl.h; you must #include + // that file to use these. + +// Avoid ugly line wrapping +#define input io::CodedInputStream* input +#define output io::CodedOutputStream* output +#define field_number int field_number +#define INL GOOGLE_ATTRIBUTE_ALWAYS_INLINE + + // Read fields, not including tags. The assumption is that you already + // read the tag to determine what field to read. + static inline bool ReadInt32 (input, int32* value); + static inline bool ReadInt64 (input, int64* value); + static inline bool ReadUInt32 (input, uint32* value); + static inline bool ReadUInt64 (input, uint64* value); + static inline bool ReadSInt32 (input, int32* value); + static inline bool ReadSInt64 (input, int64* value); + static inline bool ReadFixed32 (input, uint32* value); + static inline bool ReadFixed64 (input, uint64* value); + static inline bool ReadSFixed32(input, int32* value); + static inline bool ReadSFixed64(input, int64* value); + static inline bool ReadFloat (input, float* value); + static inline bool ReadDouble (input, double* value); + static inline bool ReadBool (input, bool* value); + static inline bool ReadEnum (input, int* value); + + static inline bool ReadString(input, string* value); + static inline bool ReadBytes (input, string* value); + + static inline bool ReadGroup (field_number, input, Message* value); + static inline bool ReadMessage(input, Message* value); + + // Like above, but de-virtualize the call to MergePartialFromCodedStream(). + // The pointer must point at an instance of MessageType, *not* a subclass (or + // the subclass must not override MergePartialFromCodedStream()). + template + static inline bool ReadGroupNoVirtual(field_number, input, + MessageType* value); + template + static inline bool ReadMessageNoVirtual(input, MessageType* value); + + // Write a tag. The Write*() functions automatically include the tag, so + // normally there's no need to call this. + static inline bool WriteTag(field_number, WireType type, output) INL; + + // Write fields, including tags. + static inline bool WriteInt32 (field_number, int32 value, output) INL; + static inline bool WriteInt64 (field_number, int64 value, output) INL; + static inline bool WriteUInt32 (field_number, uint32 value, output) INL; + static inline bool WriteUInt64 (field_number, uint64 value, output) INL; + static inline bool WriteSInt32 (field_number, int32 value, output) INL; + static inline bool WriteSInt64 (field_number, int64 value, output) INL; + static inline bool WriteFixed32 (field_number, uint32 value, output) INL; + static inline bool WriteFixed64 (field_number, uint64 value, output) INL; + static inline bool WriteSFixed32(field_number, int32 value, output) INL; + static inline bool WriteSFixed64(field_number, int64 value, output) INL; + static inline bool WriteFloat (field_number, float value, output) INL; + static inline bool WriteDouble (field_number, double value, output) INL; + static inline bool WriteBool (field_number, bool value, output) INL; + static inline bool WriteEnum (field_number, int value, output) INL; + + static inline bool WriteString(field_number, const string& value, output) INL; + static inline bool WriteBytes (field_number, const string& value, output) INL; + + static inline bool WriteGroup(field_number, const Message& value, output) INL; + static inline bool WriteMessage( + field_number, const Message& value, output) INL; + + // Like above, but de-virtualize the call to SerializeWithCachedSizes(). The + // pointer must point at an instance of MessageType, *not* a subclass (or + // the subclass must not override SerializeWithCachedSizes()). + template + static inline bool WriteGroupNoVirtual( + field_number, const MessageType& value, output) INL; + template + static inline bool WriteMessageNoVirtual( + field_number, const MessageType& value, output) INL; + + // Compute the byte size of a tag. For groups, this includes both the start + // and end tags. + static inline int TagSize(field_number, FieldDescriptor::Type type); + + // Compute the byte size of a field. The XxSize() functions do NOT include + // the tag, so you must also call TagSize(). (This is because, for repeated + // fields, you should only call TagSize() once and multiply it by the element + // count, but you may have to call XxSize() for each individual element.) + static inline int Int32Size ( int32 value); + static inline int Int64Size ( int64 value); + static inline int UInt32Size (uint32 value); + static inline int UInt64Size (uint64 value); + static inline int SInt32Size ( int32 value); + static inline int SInt64Size ( int64 value); + static inline int EnumSize ( int value); + + // These types always have the same size. + static const int kFixed32Size = 4; + static const int kFixed64Size = 8; + static const int kSFixed32Size = 4; + static const int kSFixed64Size = 8; + static const int kFloatSize = 4; + static const int kDoubleSize = 8; + static const int kBoolSize = 1; + + static inline int StringSize(const string& value); + static inline int BytesSize (const string& value); + + static inline int GroupSize (const Message& value); + static inline int MessageSize(const Message& value); + + // Like above, but de-virtualize the call to ByteSize(). The + // pointer must point at an instance of MessageType, *not* a subclass (or + // the subclass must not override ByteSize()). + template + static inline int GroupSizeNoVirtual (const MessageType& value); + template + static inline int MessageSizeNoVirtual(const MessageType& value); + +#undef input +#undef output +#undef field_number +#undef INL + + private: + static const WireType kWireTypeForFieldType[]; + + // Parse/serialize a MessageSet::Item group. Used with messages that use + // opion message_set_wire_format = true. + static bool ParseAndMergeMessageSetItem( + io::CodedInputStream* input, + Message::Reflection* message_reflection); + static bool SerializeMessageSetItemWithCachedSizes( + const FieldDescriptor* field, + const Message::Reflection* message_reflection, + io::CodedOutputStream* output); + static int MessageSetItemByteSize( + const FieldDescriptor* field, + const Message::Reflection* message_reflection); + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat); +}; + +// inline methods ==================================================== + +// This macro does the same thing as WireFormat::MakeTag(), but the +// result is usable as a compile-time constant, which makes it usable +// as a switch case or a template input. WireFormat::MakeTag() is more +// type-safe, though, so prefer it if possible. +#define GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(FIELD_NUMBER, TYPE) \ + static_cast( \ + ((FIELD_NUMBER) << ::google::protobuf::internal::WireFormat::kTagTypeBits) | (TYPE)) + +inline uint32 WireFormat::MakeTag(const FieldDescriptor* field) { + return MakeTag(field->number(), WireTypeForFieldType(field->type())); +} + +inline uint32 WireFormat::MakeTag(int field_number, WireType type) { + return GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(field_number, type); +} + +inline WireFormat::WireType WireFormat::GetTagWireType(uint32 tag) { + return static_cast(tag & kTagTypeMask); +} + +inline int WireFormat::GetTagFieldNumber(uint32 tag) { + return static_cast(tag >> kTagTypeBits); +} + +inline uint32 WireFormat::EncodeFloat(float value) { + union {float f; uint32 i;}; + f = value; + return i; +} + +inline float WireFormat::DecodeFloat(uint32 value) { + union {float f; uint32 i;}; + i = value; + return f; +} + +inline uint64 WireFormat::EncodeDouble(double value) { + union {double f; uint64 i;}; + f = value; + return i; +} + +inline double WireFormat::DecodeDouble(uint64 value) { + union {double f; uint64 i;}; + i = value; + return f; +} + +// ZigZag Transform: Encodes signed integers so that they can be +// effectively used with varint encoding. +// +// varint operates on unsigned integers, encoding smaller numbers into +// fewer bytes. If you try to use it on a signed integer, it will treat +// this number as a very large unsigned integer, which means that even +// small signed numbers like -1 will take the maximum number of bytes +// (10) to encode. ZigZagEncode() maps signed integers to unsigned +// in such a way that those with a small absolute value will have smaller +// encoded values, making them appropriate for encoding using varint. +// +// int32 -> uint32 +// ------------------------- +// 0 -> 0 +// -1 -> 1 +// 1 -> 2 +// -2 -> 3 +// ... -> ... +// 2147483647 -> 4294967294 +// -2147483648 -> 4294967295 +// +// >> encode >> +// << decode << + +inline uint32 WireFormat::ZigZagEncode32(int32 n) { + // Note: the right-shift must be arithmetic + return (n << 1) ^ (n >> 31); +} + +inline int32 WireFormat::ZigZagDecode32(uint32 n) { + return (n >> 1) ^ -static_cast(n & 1); +} + +inline uint64 WireFormat::ZigZagEncode64(int64 n) { + // Note: the right-shift must be arithmetic + return (n << 1) ^ (n >> 63); +} + +inline int64 WireFormat::ZigZagDecode64(uint64 n) { + return (n >> 1) ^ -static_cast(n & 1); +} + +} // namespace internal +} // namespace protobuf + +} // namespace google +#endif // GOOGLE_PROTOBUF_WIRE_FORMAT_H__ -- cgit v1.2.3