From 26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45 Mon Sep 17 00:00:00 2001 From: "kenton@google.com" Date: Fri, 21 Nov 2008 00:06:27 +0000 Subject: Integrate changes from internal code. protoc * Enum values may now have custom options, using syntax similar to field options. * Fixed bug where .proto files which use custom options but don't actually define them (i.e. they import another .proto file defining the options) had to explicitly import descriptor.proto. * Adjacent string literals in .proto files will now be concatenated, like in C. C++ * Generated message classes now have a Swap() method which efficiently swaps the contents of two objects. * All message classes now have a SpaceUsed() method which returns an estimate of the number of bytes of allocated memory currently owned by the object. This is particularly useful when you are reusing a single message object to improve performance but want to make sure it doesn't bloat up too large. * New method Message::SerializeAsString() returns a string containing the serialized data. May be more convenient than calling SerializeToString(string*). * In debug mode, log error messages when string-type fields are found to contain bytes that are not valid UTF-8. * Fixed bug where a message with multiple extension ranges couldn't parse extensions. * Fixed bug where MergeFrom(const Message&) didn't do anything if invoked on a message that contained no fields (but possibly contained extensions). * Fixed ShortDebugString() to not be O(n^2). Durr. * Fixed crash in TextFormat parsing if the first token in the input caused a tokenization error. Java * New overload of mergeFrom() which parses a slice of a byte array instead of the whole thing. * New method ByteString.asReadOnlyByteBuffer() does what it sounds like. * Improved performance of isInitialized() when optimizing for code size. Python * Corrected ListFields() signature in Message base class to match what subclasses actually implement. * Some minor refactoring. --- src/google/protobuf/wire_format_unittest.cc | 150 +++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 1 deletion(-) (limited to 'src/google/protobuf/wire_format_unittest.cc') diff --git a/src/google/protobuf/wire_format_unittest.cc b/src/google/protobuf/wire_format_unittest.cc index 6e3d4745..43dccd1a 100644 --- a/src/google/protobuf/wire_format_unittest.cc +++ b/src/google/protobuf/wire_format_unittest.cc @@ -199,6 +199,30 @@ TEST(WireFormatTest, SerializeFieldsAndExtensions) { TestUtil::ExpectAllFieldsAndExtensionsInOrder(generated_data); } +TEST(WireFormatTest, ParseMultipleExtensionRanges) { + // Make sure we can parse a message that contains multiple extensions ranges. + unittest::TestFieldOrderings source; + string data; + + TestUtil::SetAllFieldsAndExtensions(&source); + source.SerializeToString(&data); + + { + unittest::TestFieldOrderings dest; + EXPECT_TRUE(dest.ParseFromString(data)); + EXPECT_EQ(source.DebugString(), dest.DebugString()); + } + + // Also test using reflection-based parsing. + { + unittest::TestFieldOrderings dest; + io::ArrayInputStream raw_input(data.data(), data.size()); + io::CodedInputStream coded_input(&raw_input); + EXPECT_TRUE(WireFormat::ParseAndMergePartial(&coded_input, &dest)); + EXPECT_EQ(source.DebugString(), dest.DebugString()); + } +} + const int kUnknownTypeId = 1550055; TEST(WireFormatTest, SerializeMessageSet) { @@ -421,7 +445,7 @@ class WireFormatInvalidInputTest : public testing::Test { io::StringOutputStream raw_output(&result); io::CodedOutputStream output(&raw_output); - EXPECT_TRUE(WireFormat::WriteString( + EXPECT_TRUE(WireFormat::WriteBytes( field->number(), string(bytes, size), &output)); } @@ -541,6 +565,130 @@ TEST_F(WireFormatInvalidInputTest, InvalidStringInUnknownGroup) { EXPECT_FALSE(WireFormat::SkipMessage(&coded_input, &unknown_fields)); } +// Test differences between string and bytes. +// Value of a string type must be valid UTF-8 string. When UTF-8 +// validation is enabled (GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED): +// WriteInvalidUTF8String: see error message. +// ReadInvalidUTF8String: see error message. +// WriteValidUTF8String: fine. +// ReadValidUTF8String: fine. +// WriteAnyBytes: fine. +// ReadAnyBytes: fine. +const char * kInvalidUTF8String = "Invalid UTF-8: \xA0\xB0\xC0\xD0"; +const char * kValidUTF8String = "Valid UTF-8: \x01\x02\u8C37\u6B4C"; + +template +bool WriteMessage(const char *value, T *message, string *wire_buffer) { + message->set_data(value); + wire_buffer->clear(); + message->AppendToString(wire_buffer); + return (wire_buffer->size() > 0); +} + +template +bool ReadMessage(const string &wire_buffer, T *message) { + return message->ParseFromArray(wire_buffer.data(), wire_buffer.size()); +} + +TEST(Utf8ValidationTest, WriteInvalidUTF8String) { + string wire_buffer; + protobuf_unittest::OneString input; + vector errors; + { + ScopedMemoryLog log; + WriteMessage(kInvalidUTF8String, &input, &wire_buffer); + errors = log.GetMessages(ERROR); + } +#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED + ASSERT_EQ(1, errors.size()); + EXPECT_EQ("Encountered string containing invalid UTF-8 data while " + "serializing protocol buffer. Strings must contain only UTF-8; " + "use the 'bytes' type for raw bytes.", + errors[0]); + +#else + ASSERT_EQ(0, errors.size()); +#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED +} + +TEST(Utf8ValidationTest, ReadInvalidUTF8String) { + string wire_buffer; + protobuf_unittest::OneString input; + WriteMessage(kInvalidUTF8String, &input, &wire_buffer); + protobuf_unittest::OneString output; + vector errors; + { + ScopedMemoryLog log; + ReadMessage(wire_buffer, &output); + errors = log.GetMessages(ERROR); + } +#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED + ASSERT_EQ(1, errors.size()); + EXPECT_EQ("Encountered string containing invalid UTF-8 data while " + "parsing protocol buffer. Strings must contain only UTF-8; " + "use the 'bytes' type for raw bytes.", + errors[0]); + +#else + ASSERT_EQ(0, errors.size()); +#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED +} + +TEST(Utf8ValidationTest, WriteValidUTF8String) { + string wire_buffer; + protobuf_unittest::OneString input; + vector errors; + { + ScopedMemoryLog log; + WriteMessage(kValidUTF8String, &input, &wire_buffer); + errors = log.GetMessages(ERROR); + } + ASSERT_EQ(0, errors.size()); +} + +TEST(Utf8ValidationTest, ReadValidUTF8String) { + string wire_buffer; + protobuf_unittest::OneString input; + WriteMessage(kValidUTF8String, &input, &wire_buffer); + protobuf_unittest::OneString output; + vector errors; + { + ScopedMemoryLog log; + ReadMessage(wire_buffer, &output); + errors = log.GetMessages(ERROR); + } + ASSERT_EQ(0, errors.size()); + EXPECT_EQ(input.data(), output.data()); +} + +// Bytes: anything can pass as bytes, use invalid UTF-8 string to test +TEST(Utf8ValidationTest, WriteArbitraryBytes) { + string wire_buffer; + protobuf_unittest::OneBytes input; + vector errors; + { + ScopedMemoryLog log; + WriteMessage(kInvalidUTF8String, &input, &wire_buffer); + errors = log.GetMessages(ERROR); + } + ASSERT_EQ(0, errors.size()); +} + +TEST(Utf8ValidationTest, ReadArbitraryBytes) { + string wire_buffer; + protobuf_unittest::OneBytes input; + WriteMessage(kInvalidUTF8String, &input, &wire_buffer); + protobuf_unittest::OneBytes output; + vector errors; + { + ScopedMemoryLog log; + ReadMessage(wire_buffer, &output); + errors = log.GetMessages(ERROR); + } + ASSERT_EQ(0, errors.size()); + EXPECT_EQ(input.data(), output.data()); +} + } // namespace } // namespace internal } // namespace protobuf -- cgit v1.2.3