Submit recent changes from internal branch, including "lite mode" for

C++ and Java. See CHANGES.txt for more details.
author: kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> 2009-07-29 01:13:20 +0000
committer: kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> 2009-07-29 01:13:20 +0000
commit: 80b1d62bfcea65c59e2160da71dad84b1bd19cef (patch)
tree: 5423b830c53174fec83a7ea01ff0877e11c1ddb6 /src/google/protobuf/wire_format.h
parent: d2fd0638c309113ccae3731a58e30419f522269a (diff)
1 files changed, 83 insertions, 353 deletions
diff --git a/src/google/protobuf/wire_format.h b/src/google/protobuf/wire_format.h
index 963f427a..c7539250 100644
--- a/src/google/protobuf/wire_format.h
+++ b/src/google/protobuf/wire_format.h
@@ -40,17 +40,23 @@
 #define GOOGLE_PROTOBUF_WIRE_FORMAT_H__
 
 #include <string>
-#include <google/protobuf/message.h>
+#include <google/protobuf/descriptor.pb.h>
 #include <google/protobuf/descriptor.h>
+#include <google/protobuf/message.h>
+#include <google/protobuf/wire_format_lite.h>
 
-namespace google {
+// Do UTF-8 validation on string type in Debug build only
+#ifndef NDEBUG
+#define GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+#endif
 
+namespace google {
 namespace protobuf {
   namespace io {
     class CodedInputStream;      // coded_stream.h
     class CodedOutputStream;     // coded_stream.h
   }
-  class UnknownFieldSet;       // unknown_field_set.h
+  class UnknownFieldSet;         // unknown_field_set.h
 }
 
 namespace protobuf {
@@ -60,13 +66,26 @@ namespace internal {
 // protocol-complier-generated message classes.  It must not be called
 // directly by clients.
 //
-// This class contains helpers for implementing the binary protocol buffer
-// wire format.  These helpers are called primarily by generated code.  The
-// class also contains reflection-based implementations of the wire format.
+// This class contains code for implementing the binary protocol buffer
+// wire format via reflection.  The WireFormatLite class implements the
+// non-reflection based routines.
 //
-// This class is really a namespace that contains only static methods.
+// This class is really a namespace that contains only static methods
 class LIBPROTOBUF_EXPORT WireFormat {
  public:
+
+  // Given a field return its WireType
+  static inline WireFormatLite::WireType WireTypeForField(
+      const FieldDescriptor* field);
+
+  // Given a FieldSescriptor::Type return its WireType
+  static inline WireFormatLite::WireType WireTypeForFieldType(
+      FieldDescriptor::Type type);
+
+  // Compute the byte size of a tag.  For groups, this includes both the start
+  // and end tags.
+  static inline int TagSize(int field_number, FieldDescriptor::Type type);
+
   // These procedures can be used to implement the methods of Message which
   // handle parsing and serialization of the protocol buffer wire format
   // using only the Reflection interface.  When you ask the protocol
@@ -152,41 +171,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
   static int ComputeUnknownMessageSetItemsSize(
       const UnknownFieldSet& unknown_fields);
 
-  // -----------------------------------------------------------------
-  // Helper constants and functions related to the format.  These are
-  // mostly meant for internal and generated code to use.
-
-  // The wire format is composed of a sequence of tag/value pairs, each
-  // of which contains the value of one field (or one element of a repeated
-  // field).  Each tag is encoded as a varint.  The lower bits of the tag
-  // identify its wire type, which specifies the format of the data to follow.
-  // The rest of the bits contain the field number.  Each type of field (as
-  // declared by FieldDescriptor::Type, in descriptor.h) maps to one of
-  // these wire types.  Immediately following each tag is the field's value,
-  // encoded in the format specified by the wire type.  Because the tag
-  // identifies the encoding of this data, it is possible to skip
-  // unrecognized fields for forwards compatibility.
-
-  enum WireType {
-    WIRETYPE_VARINT           = 0,
-    WIRETYPE_FIXED64          = 1,
-    WIRETYPE_LENGTH_DELIMITED = 2,
-    WIRETYPE_START_GROUP      = 3,
-    WIRETYPE_END_GROUP        = 4,
-    WIRETYPE_FIXED32          = 5,
-  };
-
-  static inline WireType WireTypeForFieldType(FieldDescriptor::Type type) {
-    return kWireTypeForFieldType[type];
-  }
-  // This is different from WireTypeForFieldType(field->type()) in the case of
-  // packed repeated fields.
-  static inline WireType WireTypeForField(const FieldDescriptor* field);
-
-  // Number of bits in a tag which identify the wire type.
-  static const int kTagTypeBits = 3;
-  // Mask for those bits.
-  static const uint32 kTagTypeMask = (1 << kTagTypeBits) - 1;
 
   // Helper functions for encoding and decoding tags.  (Inlined below and in
   // _inl.h)
@@ -194,28 +178,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
   // This is different from MakeTag(field->number(), field->type()) in the case
   // of packed repeated fields.
   static uint32 MakeTag(const FieldDescriptor* field);
-  static uint32 MakeTag(int field_number, WireType type);
-  static WireType GetTagWireType(uint32 tag);
-  static int GetTagFieldNumber(uint32 tag);
-
-  // Helper functions for converting between floats/doubles and IEEE-754
-  // uint32s/uint64s so that they can be written.  (Assumes your platform
-  // uses IEEE-754 floats.)
-  static uint32 EncodeFloat(float value);
-  static float DecodeFloat(uint32 value);
-  static uint64 EncodeDouble(double value);
-  static double DecodeDouble(uint64 value);
-
-  // Helper functions for mapping signed integers to unsigned integers in
-  // such a way that numbers with small magnitudes will encode to smaller
-  // varints.  If you simply static_cast a negative number to an unsigned
-  // number and varint-encode it, it will always take 10 bytes, defeating
-  // the purpose of varint.  So, for the "sint32" and "sint64" field types,
-  // we ZigZag-encode the values.
-  static uint32 ZigZagEncode32(int32 n);
-  static int32  ZigZagDecode32(uint32 n);
-  static uint64 ZigZagEncode64(int64 n);
-  static int64  ZigZagDecode64(uint64 n);
 
   // Parse a single field.  The input should start out positioned immidately
   // after the tag.
@@ -238,223 +200,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
       const FieldDescriptor* field,        // Cannot be NULL
       const Message& message);
 
-  // =================================================================
-  // Methods for reading/writing individual field.  The implementations
-  // of these methods are defined in wire_format_inl.h; you must #include
-  // that file to use these.
-
-// Avoid ugly line wrapping
-#define input  io::CodedInputStream*  input
-#define output io::CodedOutputStream* output
-#define field_number int field_number
-#define INL GOOGLE_ATTRIBUTE_ALWAYS_INLINE
-
-  // Read fields, not including tags.  The assumption is that you already
-  // read the tag to determine what field to read.
-  static inline bool ReadInt32   (input,  int32* value);
-  static inline bool ReadInt64   (input,  int64* value);
-  static inline bool ReadUInt32  (input, uint32* value);
-  static inline bool ReadUInt64  (input, uint64* value);
-  static inline bool ReadSInt32  (input,  int32* value);
-  static inline bool ReadSInt64  (input,  int64* value);
-  static inline bool ReadFixed32 (input, uint32* value);
-  static inline bool ReadFixed64 (input, uint64* value);
-  static inline bool ReadSFixed32(input,  int32* value);
-  static inline bool ReadSFixed64(input,  int64* value);
-  static inline bool ReadFloat   (input,  float* value);
-  static inline bool ReadDouble  (input, double* value);
-  static inline bool ReadBool    (input,   bool* value);
-  static inline bool ReadEnum    (input,    int* value);
-
-  static inline bool ReadString(input, string* value);
-  static inline bool ReadBytes (input, string* value);
-
-  static inline bool ReadGroup  (field_number, input, Message* value);
-  static inline bool ReadMessage(input, Message* value);
-
-  // Like above, but de-virtualize the call to MergePartialFromCodedStream().
-  // The pointer must point at an instance of MessageType, *not* a subclass (or
-  // the subclass must not override MergePartialFromCodedStream()).
-  template<typename MessageType>
-  static inline bool ReadGroupNoVirtual(field_number, input,
-                                        MessageType* value);
-  template<typename MessageType>
-  static inline bool ReadMessageNoVirtual(input, MessageType* value);
-
-  // Write a tag.  The Write*() functions typically include the tag, so
-  // normally there's no need to call this unless using the Write*NoTag()
-  // variants.
-  static inline void WriteTag(field_number, WireType type, output) INL;
-
-  // Write fields, without tags.
-  static inline void WriteInt32NoTag   (int32 value, output) INL;
-  static inline void WriteInt64NoTag   (int64 value, output) INL;
-  static inline void WriteUInt32NoTag  (uint32 value, output) INL;
-  static inline void WriteUInt64NoTag  (uint64 value, output) INL;
-  static inline void WriteSInt32NoTag  (int32 value, output) INL;
-  static inline void WriteSInt64NoTag  (int64 value, output) INL;
-  static inline void WriteFixed32NoTag (uint32 value, output) INL;
-  static inline void WriteFixed64NoTag (uint64 value, output) INL;
-  static inline void WriteSFixed32NoTag(int32 value, output) INL;
-  static inline void WriteSFixed64NoTag(int64 value, output) INL;
-  static inline void WriteFloatNoTag   (float value, output) INL;
-  static inline void WriteDoubleNoTag  (double value, output) INL;
-  static inline void WriteBoolNoTag    (bool value, output) INL;
-  static inline void WriteEnumNoTag    (int value, output) INL;
-
-  // Write fields, including tags.
-  static inline void WriteInt32   (field_number,  int32 value, output) INL;
-  static inline void WriteInt64   (field_number,  int64 value, output) INL;
-  static inline void WriteUInt32  (field_number, uint32 value, output) INL;
-  static inline void WriteUInt64  (field_number, uint64 value, output) INL;
-  static inline void WriteSInt32  (field_number,  int32 value, output) INL;
-  static inline void WriteSInt64  (field_number,  int64 value, output) INL;
-  static inline void WriteFixed32 (field_number, uint32 value, output) INL;
-  static inline void WriteFixed64 (field_number, uint64 value, output) INL;
-  static inline void WriteSFixed32(field_number,  int32 value, output) INL;
-  static inline void WriteSFixed64(field_number,  int64 value, output) INL;
-  static inline void WriteFloat   (field_number,  float value, output) INL;
-  static inline void WriteDouble  (field_number, double value, output) INL;
-  static inline void WriteBool    (field_number,   bool value, output) INL;
-  static inline void WriteEnum    (field_number,    int value, output) INL;
-
-  static inline void WriteString(field_number, const string& value, output) INL;
-  static inline void WriteBytes (field_number, const string& value, output) INL;
-
-  static inline void WriteGroup(field_number, const Message& value, output) INL;
-  static inline void WriteMessage(
-    field_number, const Message& value, output) INL;
-
-  // Like above, but de-virtualize the call to SerializeWithCachedSizes().  The
-  // pointer must point at an instance of MessageType, *not* a subclass (or
-  // the subclass must not override SerializeWithCachedSizes()).
-  template<typename MessageType>
-  static inline void WriteGroupNoVirtual(
-    field_number, const MessageType& value, output) INL;
-  template<typename MessageType>
-  static inline void WriteMessageNoVirtual(
-    field_number, const MessageType& value, output) INL;
-
-#undef output
-#define output uint8* target
-
-  // Like above, but use only *ToArray methods of CodedOutputStream.
-  static inline uint8* WriteTagToArray(field_number, WireType type, output) INL;
-
-  // Write fields, without tags.
-  static inline uint8* WriteInt32NoTagToArray   (int32 value, output) INL;
-  static inline uint8* WriteInt64NoTagToArray   (int64 value, output) INL;
-  static inline uint8* WriteUInt32NoTagToArray  (uint32 value, output) INL;
-  static inline uint8* WriteUInt64NoTagToArray  (uint64 value, output) INL;
-  static inline uint8* WriteSInt32NoTagToArray  (int32 value, output) INL;
-  static inline uint8* WriteSInt64NoTagToArray  (int64 value, output) INL;
-  static inline uint8* WriteFixed32NoTagToArray (uint32 value, output) INL;
-  static inline uint8* WriteFixed64NoTagToArray (uint64 value, output) INL;
-  static inline uint8* WriteSFixed32NoTagToArray(int32 value, output) INL;
-  static inline uint8* WriteSFixed64NoTagToArray(int64 value, output) INL;
-  static inline uint8* WriteFloatNoTagToArray   (float value, output) INL;
-  static inline uint8* WriteDoubleNoTagToArray  (double value, output) INL;
-  static inline uint8* WriteBoolNoTagToArray    (bool value, output) INL;
-  static inline uint8* WriteEnumNoTagToArray    (int value, output) INL;
-
-  // Write fields, including tags.
-  static inline uint8* WriteInt32ToArray(
-    field_number, int32 value, output) INL;
-  static inline uint8* WriteInt64ToArray(
-    field_number, int64 value, output) INL;
-  static inline uint8* WriteUInt32ToArray(
-    field_number, uint32 value, output) INL;
-  static inline uint8* WriteUInt64ToArray(
-    field_number, uint64 value, output) INL;
-  static inline uint8* WriteSInt32ToArray(
-    field_number, int32 value, output) INL;
-  static inline uint8* WriteSInt64ToArray(
-    field_number, int64 value, output) INL;
-  static inline uint8* WriteFixed32ToArray(
-    field_number, uint32 value, output) INL;
-  static inline uint8* WriteFixed64ToArray(
-    field_number, uint64 value, output) INL;
-  static inline uint8* WriteSFixed32ToArray(
-    field_number, int32 value, output) INL;
-  static inline uint8* WriteSFixed64ToArray(
-    field_number, int64 value, output) INL;
-  static inline uint8* WriteFloatToArray(
-    field_number, float value, output) INL;
-  static inline uint8* WriteDoubleToArray(
-    field_number, double value, output) INL;
-  static inline uint8* WriteBoolToArray(
-    field_number, bool value, output) INL;
-  static inline uint8* WriteEnumToArray(
-    field_number, int value, output) INL;
-
-  static inline uint8* WriteStringToArray(
-    field_number, const string& value, output) INL;
-  static inline uint8* WriteBytesToArray(
-    field_number, const string& value, output) INL;
-
-  static inline uint8* WriteGroupToArray(
-      field_number, const Message& value, output) INL;
-  static inline uint8* WriteMessageToArray(
-      field_number, const Message& value, output) INL;
-
-  // Like above, but de-virtualize the call to SerializeWithCachedSizes().  The
-  // pointer must point at an instance of MessageType, *not* a subclass (or
-  // the subclass must not override SerializeWithCachedSizes()).
-  template<typename MessageType>
-  static inline uint8* WriteGroupNoVirtualToArray(
-    field_number, const MessageType& value, output) INL;
-  template<typename MessageType>
-  static inline uint8* WriteMessageNoVirtualToArray(
-    field_number, const MessageType& value, output) INL;
-
-#undef output
-#undef input
-#undef INL
-
-  // Compute the byte size of a tag.  For groups, this includes both the start
-  // and end tags.
-  static inline int TagSize(field_number, FieldDescriptor::Type type);
-
-#undef field_number
-
-  // Compute the byte size of a field.  The XxSize() functions do NOT include
-  // the tag, so you must also call TagSize().  (This is because, for repeated
-  // fields, you should only call TagSize() once and multiply it by the element
-  // count, but you may have to call XxSize() for each individual element.)
-  static inline int Int32Size   ( int32 value);
-  static inline int Int64Size   ( int64 value);
-  static inline int UInt32Size  (uint32 value);
-  static inline int UInt64Size  (uint64 value);
-  static inline int SInt32Size  ( int32 value);
-  static inline int SInt64Size  ( int64 value);
-  static inline int EnumSize    (   int value);
-
-  // These types always have the same size.
-  static const int kFixed32Size  = 4;
-  static const int kFixed64Size  = 8;
-  static const int kSFixed32Size = 4;
-  static const int kSFixed64Size = 8;
-  static const int kFloatSize    = 4;
-  static const int kDoubleSize   = 8;
-  static const int kBoolSize     = 1;
-
-  static inline int StringSize(const string& value);
-  static inline int BytesSize (const string& value);
-
-  static inline int GroupSize  (const Message& value);
-  static inline int MessageSize(const Message& value);
-
-  // Like above, but de-virtualize the call to ByteSize().  The
-  // pointer must point at an instance of MessageType, *not* a subclass (or
-  // the subclass must not override ByteSize()).
-  template<typename MessageType>
-  static inline int GroupSizeNoVirtual  (const MessageType& value);
-  template<typename MessageType>
-  static inline int MessageSizeNoVirtual(const MessageType& value);
-
- private:
-  static const WireType kWireTypeForFieldType[];
-
   // Parse/serialize a MessageSet::Item group.  Used with messages that use
   // opion message_set_wire_format = true.
   static bool ParseAndMergeMessageSetItem(
@@ -476,97 +221,82 @@ class LIBPROTOBUF_EXPORT WireFormat {
       const FieldDescriptor* field,        // Cannot be NULL
       const Message& message);
 
-  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat);
-};
+  enum Operation {
+    PARSE,
+    SERIALIZE,
+  };
 
-// inline methods ====================================================
+  // Verifies that a string field is valid UTF8, logging an error if not.
+  static void VerifyUTF8String(const char* data, int size, Operation op);
 
-// This macro does the same thing as WireFormat::MakeTag(), but the
-// result is usable as a compile-time constant, which makes it usable
-// as a switch case or a template input.  WireFormat::MakeTag() is more
-// type-safe, though, so prefer it if possible.
-#define GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(FIELD_NUMBER, TYPE)             \
-  static_cast<uint32>(                                              \
-    ((FIELD_NUMBER) << ::google::protobuf::internal::WireFormat::kTagTypeBits) | (TYPE))
+ private:
+  // Verifies that a string field is valid UTF8, logging an error if not.
+  static void VerifyUTF8StringFallback(
+      const char* data,
+      int size,
+      Operation op);
 
-inline uint32 WireFormat::MakeTag(int field_number, WireType type) {
-  return GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(field_number, type);
-}
 
-inline WireFormat::WireType WireFormat::GetTagWireType(uint32 tag) {
-  return static_cast<WireType>(tag & kTagTypeMask);
-}
 
-inline int WireFormat::GetTagFieldNumber(uint32 tag) {
-  return static_cast<int>(tag >> kTagTypeBits);
-}
+  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat);
+};
 
-inline uint32 WireFormat::EncodeFloat(float value) {
-  union {float f; uint32 i;};
-  f = value;
-  return i;
-}
+// Subclass of FieldSkipper which saves skipped fields to an UnknownFieldSet.
+class LIBPROTOBUF_EXPORT UnknownFieldSetFieldSkipper : public FieldSkipper {
+ public:
+  UnknownFieldSetFieldSkipper(UnknownFieldSet* unknown_fields)
+      : unknown_fields_(unknown_fields) {}
+  virtual ~UnknownFieldSetFieldSkipper() {}
 
-inline float WireFormat::DecodeFloat(uint32 value) {
-  union {float f; uint32 i;};
-  i = value;
-  return f;
-}
+  // implements FieldSkipper -----------------------------------------
+  virtual bool SkipField(io::CodedInputStream* input, uint32 tag);
+  virtual bool SkipMessage(io::CodedInputStream* input);
+  virtual void SkipUnknownEnum(int field_number, int value);
 
-inline uint64 WireFormat::EncodeDouble(double value) {
-  union {double f; uint64 i;};
-  f = value;
-  return i;
-}
+ private:
+  UnknownFieldSet* unknown_fields_;
+};
 
-inline double WireFormat::DecodeDouble(uint64 value) {
-  union {double f; uint64 i;};
-  i = value;
-  return f;
-}
+// inline methods ====================================================
 
-// ZigZag Transform:  Encodes signed integers so that they can be
-// effectively used with varint encoding.
-//
-// varint operates on unsigned integers, encoding smaller numbers into
-// fewer bytes.  If you try to use it on a signed integer, it will treat
-// this number as a very large unsigned integer, which means that even
-// small signed numbers like -1 will take the maximum number of bytes
-// (10) to encode.  ZigZagEncode() maps signed integers to unsigned
-// in such a way that those with a small absolute value will have smaller
-// encoded values, making them appropriate for encoding using varint.
-//
-//       int32 ->     uint32
-// -------------------------
-//           0 ->          0
-//          -1 ->          1
-//           1 ->          2
-//          -2 ->          3
-//         ... ->        ...
-//  2147483647 -> 4294967294
-// -2147483648 -> 4294967295
-//
-//        >> encode >>
-//        << decode <<
+inline WireFormatLite::WireType WireFormat::WireTypeForField(
+    const FieldDescriptor* field) {
+  if (field->options().packed()) {
+    return WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
+  } else {
+    return WireTypeForFieldType(field->type());
+  }
+}
 
-inline uint32 WireFormat::ZigZagEncode32(int32 n) {
-  // Note:  the right-shift must be arithmetic
-  return (n << 1) ^ (n >> 31);
+inline WireFormatLite::WireType WireFormat::WireTypeForFieldType(
+    FieldDescriptor::Type type) {
+  // Some compilers don't like enum -> enum casts, so we implicit_cast to
+  // int first.
+  return WireFormatLite::WireTypeForFieldType(
+      static_cast<WireFormatLite::FieldType>(
+        implicit_cast<int>(type)));
 }
 
-inline int32 WireFormat::ZigZagDecode32(uint32 n) {
-  return (n >> 1) ^ -static_cast<int32>(n & 1);
+inline uint32 WireFormat::MakeTag(const FieldDescriptor* field) {
+  return WireFormatLite::MakeTag(field->number(), WireTypeForField(field));
 }
 
-inline uint64 WireFormat::ZigZagEncode64(int64 n) {
-  // Note:  the right-shift must be arithmetic
-  return (n << 1) ^ (n >> 63);
+inline int WireFormat::TagSize(int field_number, FieldDescriptor::Type type) {
+  // Some compilers don't like enum -> enum casts, so we implicit_cast to
+  // int first.
+  return WireFormatLite::TagSize(field_number,
+      static_cast<WireFormatLite::FieldType>(
+        implicit_cast<int>(type)));
 }
 
-inline int64 WireFormat::ZigZagDecode64(uint64 n) {
-  return (n >> 1) ^ -static_cast<int64>(n & 1);
+inline void WireFormat::VerifyUTF8String(const char* data, int size,
+    WireFormat::Operation op) {
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+  WireFormat::VerifyUTF8StringFallback(data, size, op);
+#endif
 }
 
+
 }  // namespace internal
 }  // namespace protobuf
author	kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>	2009-07-29 01:13:20 +0000
committer	kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>	2009-07-29 01:13:20 +0000
commit	80b1d62bfcea65c59e2160da71dad84b1bd19cef (patch)
tree	5423b830c53174fec83a7ea01ff0877e11c1ddb6 /src/google/protobuf/wire_format.h
parent	d2fd0638c309113ccae3731a58e30419f522269a (diff)