Integrate changes from internal code.

protoc * Enum values may now have custom options, using syntax similar to field options. * Fixed bug where .proto files which use custom options but don't actually define them (i.e. they import another .proto file defining the options) had to explicitly import descriptor.proto. * Adjacent string literals in .proto files will now be concatenated, like in C. C++ * Generated message classes now have a Swap() method which efficiently swaps the contents of two objects. * All message classes now have a SpaceUsed() method which returns an estimate of the number of bytes of allocated memory currently owned by the object. This is particularly useful when you are reusing a single message object to improve performance but want to make sure it doesn't bloat up too large. * New method Message::SerializeAsString() returns a string containing the serialized data. May be more convenient than calling SerializeToString(string*). * In debug mode, log error messages when string-type fields are found to contain bytes that are not valid UTF-8. * Fixed bug where a message with multiple extension ranges couldn't parse extensions. * Fixed bug where MergeFrom(const Message&) didn't do anything if invoked on a message that contained no fields (but possibly contained extensions). * Fixed ShortDebugString() to not be O(n^2). Durr. * Fixed crash in TextFormat parsing if the first token in the input caused a tokenization error. Java * New overload of mergeFrom() which parses a slice of a byte array instead of the whole thing. * New method ByteString.asReadOnlyByteBuffer() does what it sounds like. * Improved performance of isInitialized() when optimizing for code size. Python * Corrected ListFields() signature in Message base class to match what subclasses actually implement. * Some minor refactoring.
author: kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> 2008-11-21 00:06:27 +0000
committer: kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> 2008-11-21 00:06:27 +0000
commit: 26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45 (patch)
tree: d35cca89e0da44f136090a554ff9abc93a794fa8 /src/google/protobuf/wire_format_unittest.cc
parent: a2a32c20434807e9966e3f48375f9419134d1b55 (diff)
1 files changed, 149 insertions, 1 deletions
diff --git a/src/google/protobuf/wire_format_unittest.cc b/src/google/protobuf/wire_format_unittest.cc
index 6e3d4745..43dccd1a 100644
--- a/src/google/protobuf/wire_format_unittest.cc
+++ b/src/google/protobuf/wire_format_unittest.cc
@@ -199,6 +199,30 @@ TEST(WireFormatTest, SerializeFieldsAndExtensions) {
   TestUtil::ExpectAllFieldsAndExtensionsInOrder(generated_data);
 }
 
+TEST(WireFormatTest, ParseMultipleExtensionRanges) {
+  // Make sure we can parse a message that contains multiple extensions ranges.
+  unittest::TestFieldOrderings source;
+  string data;
+
+  TestUtil::SetAllFieldsAndExtensions(&source);
+  source.SerializeToString(&data);
+
+  {
+    unittest::TestFieldOrderings dest;
+    EXPECT_TRUE(dest.ParseFromString(data));
+    EXPECT_EQ(source.DebugString(), dest.DebugString());
+  }
+
+  // Also test using reflection-based parsing.
+  {
+    unittest::TestFieldOrderings dest;
+    io::ArrayInputStream raw_input(data.data(), data.size());
+    io::CodedInputStream coded_input(&raw_input);
+    EXPECT_TRUE(WireFormat::ParseAndMergePartial(&coded_input, &dest));
+    EXPECT_EQ(source.DebugString(), dest.DebugString());
+  }
+}
+
 const int kUnknownTypeId = 1550055;
 
 TEST(WireFormatTest, SerializeMessageSet) {
@@ -421,7 +445,7 @@ class WireFormatInvalidInputTest : public testing::Test {
       io::StringOutputStream raw_output(&result);
       io::CodedOutputStream output(&raw_output);
 
-      EXPECT_TRUE(WireFormat::WriteString(
+      EXPECT_TRUE(WireFormat::WriteBytes(
         field->number(), string(bytes, size), &output));
     }
 
@@ -541,6 +565,130 @@ TEST_F(WireFormatInvalidInputTest, InvalidStringInUnknownGroup) {
   EXPECT_FALSE(WireFormat::SkipMessage(&coded_input, &unknown_fields));
 }
 
+// Test differences between string and bytes.
+// Value of a string type must be valid UTF-8 string.  When UTF-8
+// validation is enabled (GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED):
+// WriteInvalidUTF8String:  see error message.
+// ReadInvalidUTF8String:  see error message.
+// WriteValidUTF8String: fine.
+// ReadValidUTF8String:  fine.
+// WriteAnyBytes: fine.
+// ReadAnyBytes: fine.
+const char * kInvalidUTF8String = "Invalid UTF-8: \xA0\xB0\xC0\xD0";
+const char * kValidUTF8String = "Valid UTF-8: \x01\x02\u8C37\u6B4C";
+
+template<typename T>
+bool WriteMessage(const char *value, T *message, string *wire_buffer) {
+  message->set_data(value);
+  wire_buffer->clear();
+  message->AppendToString(wire_buffer);
+  return (wire_buffer->size() > 0);
+}
+
+template<typename T>
+bool ReadMessage(const string &wire_buffer, T *message) {
+  return message->ParseFromArray(wire_buffer.data(), wire_buffer.size());
+}
+
+TEST(Utf8ValidationTest, WriteInvalidUTF8String) {
+  string wire_buffer;
+  protobuf_unittest::OneString input;
+  vector<string> errors;
+  {
+    ScopedMemoryLog log;
+    WriteMessage(kInvalidUTF8String, &input, &wire_buffer);
+    errors = log.GetMessages(ERROR);
+  }
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+  ASSERT_EQ(1, errors.size());
+  EXPECT_EQ("Encountered string containing invalid UTF-8 data while "
+            "serializing protocol buffer. Strings must contain only UTF-8; "
+            "use the 'bytes' type for raw bytes.",
+            errors[0]);
+
+#else
+  ASSERT_EQ(0, errors.size());
+#endif  // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+}
+
+TEST(Utf8ValidationTest, ReadInvalidUTF8String) {
+  string wire_buffer;
+  protobuf_unittest::OneString input;
+  WriteMessage(kInvalidUTF8String, &input, &wire_buffer);
+  protobuf_unittest::OneString output;
+  vector<string> errors;
+  {
+    ScopedMemoryLog log;
+    ReadMessage(wire_buffer, &output);
+    errors = log.GetMessages(ERROR);
+  }
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+  ASSERT_EQ(1, errors.size());
+  EXPECT_EQ("Encountered string containing invalid UTF-8 data while "
+            "parsing protocol buffer. Strings must contain only UTF-8; "
+            "use the 'bytes' type for raw bytes.",
+            errors[0]);
+
+#else
+  ASSERT_EQ(0, errors.size());
+#endif  // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+}
+
+TEST(Utf8ValidationTest, WriteValidUTF8String) {
+  string wire_buffer;
+  protobuf_unittest::OneString input;
+  vector<string> errors;
+  {
+    ScopedMemoryLog log;
+    WriteMessage(kValidUTF8String, &input, &wire_buffer);
+    errors = log.GetMessages(ERROR);
+  }
+  ASSERT_EQ(0, errors.size());
+}
+
+TEST(Utf8ValidationTest, ReadValidUTF8String) {
+  string wire_buffer;
+  protobuf_unittest::OneString input;
+  WriteMessage(kValidUTF8String, &input, &wire_buffer);
+  protobuf_unittest::OneString output;
+  vector<string> errors;
+  {
+    ScopedMemoryLog log;
+    ReadMessage(wire_buffer, &output);
+    errors = log.GetMessages(ERROR);
+  }
+  ASSERT_EQ(0, errors.size());
+  EXPECT_EQ(input.data(), output.data());
+}
+
+// Bytes: anything can pass as bytes, use invalid UTF-8 string to test
+TEST(Utf8ValidationTest, WriteArbitraryBytes) {
+  string wire_buffer;
+  protobuf_unittest::OneBytes input;
+  vector<string> errors;
+  {
+    ScopedMemoryLog log;
+    WriteMessage(kInvalidUTF8String, &input, &wire_buffer);
+    errors = log.GetMessages(ERROR);
+  }
+  ASSERT_EQ(0, errors.size());
+}
+
+TEST(Utf8ValidationTest, ReadArbitraryBytes) {
+  string wire_buffer;
+  protobuf_unittest::OneBytes input;
+  WriteMessage(kInvalidUTF8String, &input, &wire_buffer);
+  protobuf_unittest::OneBytes output;
+  vector<string> errors;
+  {
+    ScopedMemoryLog log;
+    ReadMessage(wire_buffer, &output);
+    errors = log.GetMessages(ERROR);
+  }
+  ASSERT_EQ(0, errors.size());
+  EXPECT_EQ(input.data(), output.data());
+}
+
 }  // namespace
 }  // namespace internal
 }  // namespace protobuf
author	kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>	2008-11-21 00:06:27 +0000
committer	kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>	2008-11-21 00:06:27 +0000
commit	26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45 (patch)
tree	d35cca89e0da44f136090a554ff9abc93a794fa8 /src/google/protobuf/wire_format_unittest.cc
parent	a2a32c20434807e9966e3f48375f9419134d1b55 (diff)