aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/google/protobuf/wire_format_unittest.cc
diff options
context:
space:
mode:
authorGravatar kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2008-11-21 00:06:27 +0000
committerGravatar kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2008-11-21 00:06:27 +0000
commit26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45 (patch)
treed35cca89e0da44f136090a554ff9abc93a794fa8 /src/google/protobuf/wire_format_unittest.cc
parenta2a32c20434807e9966e3f48375f9419134d1b55 (diff)
Integrate changes from internal code.
protoc * Enum values may now have custom options, using syntax similar to field options. * Fixed bug where .proto files which use custom options but don't actually define them (i.e. they import another .proto file defining the options) had to explicitly import descriptor.proto. * Adjacent string literals in .proto files will now be concatenated, like in C. C++ * Generated message classes now have a Swap() method which efficiently swaps the contents of two objects. * All message classes now have a SpaceUsed() method which returns an estimate of the number of bytes of allocated memory currently owned by the object. This is particularly useful when you are reusing a single message object to improve performance but want to make sure it doesn't bloat up too large. * New method Message::SerializeAsString() returns a string containing the serialized data. May be more convenient than calling SerializeToString(string*). * In debug mode, log error messages when string-type fields are found to contain bytes that are not valid UTF-8. * Fixed bug where a message with multiple extension ranges couldn't parse extensions. * Fixed bug where MergeFrom(const Message&) didn't do anything if invoked on a message that contained no fields (but possibly contained extensions). * Fixed ShortDebugString() to not be O(n^2). Durr. * Fixed crash in TextFormat parsing if the first token in the input caused a tokenization error. Java * New overload of mergeFrom() which parses a slice of a byte array instead of the whole thing. * New method ByteString.asReadOnlyByteBuffer() does what it sounds like. * Improved performance of isInitialized() when optimizing for code size. Python * Corrected ListFields() signature in Message base class to match what subclasses actually implement. * Some minor refactoring.
Diffstat (limited to 'src/google/protobuf/wire_format_unittest.cc')
-rw-r--r--src/google/protobuf/wire_format_unittest.cc150
1 files changed, 149 insertions, 1 deletions
diff --git a/src/google/protobuf/wire_format_unittest.cc b/src/google/protobuf/wire_format_unittest.cc
index 6e3d4745..43dccd1a 100644
--- a/src/google/protobuf/wire_format_unittest.cc
+++ b/src/google/protobuf/wire_format_unittest.cc
@@ -199,6 +199,30 @@ TEST(WireFormatTest, SerializeFieldsAndExtensions) {
TestUtil::ExpectAllFieldsAndExtensionsInOrder(generated_data);
}
+TEST(WireFormatTest, ParseMultipleExtensionRanges) {
+ // Make sure we can parse a message that contains multiple extensions ranges.
+ unittest::TestFieldOrderings source;
+ string data;
+
+ TestUtil::SetAllFieldsAndExtensions(&source);
+ source.SerializeToString(&data);
+
+ {
+ unittest::TestFieldOrderings dest;
+ EXPECT_TRUE(dest.ParseFromString(data));
+ EXPECT_EQ(source.DebugString(), dest.DebugString());
+ }
+
+ // Also test using reflection-based parsing.
+ {
+ unittest::TestFieldOrderings dest;
+ io::ArrayInputStream raw_input(data.data(), data.size());
+ io::CodedInputStream coded_input(&raw_input);
+ EXPECT_TRUE(WireFormat::ParseAndMergePartial(&coded_input, &dest));
+ EXPECT_EQ(source.DebugString(), dest.DebugString());
+ }
+}
+
const int kUnknownTypeId = 1550055;
TEST(WireFormatTest, SerializeMessageSet) {
@@ -421,7 +445,7 @@ class WireFormatInvalidInputTest : public testing::Test {
io::StringOutputStream raw_output(&result);
io::CodedOutputStream output(&raw_output);
- EXPECT_TRUE(WireFormat::WriteString(
+ EXPECT_TRUE(WireFormat::WriteBytes(
field->number(), string(bytes, size), &output));
}
@@ -541,6 +565,130 @@ TEST_F(WireFormatInvalidInputTest, InvalidStringInUnknownGroup) {
EXPECT_FALSE(WireFormat::SkipMessage(&coded_input, &unknown_fields));
}
+// Test differences between string and bytes.
+// Value of a string type must be valid UTF-8 string. When UTF-8
+// validation is enabled (GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED):
+// WriteInvalidUTF8String: see error message.
+// ReadInvalidUTF8String: see error message.
+// WriteValidUTF8String: fine.
+// ReadValidUTF8String: fine.
+// WriteAnyBytes: fine.
+// ReadAnyBytes: fine.
+const char * kInvalidUTF8String = "Invalid UTF-8: \xA0\xB0\xC0\xD0";
+const char * kValidUTF8String = "Valid UTF-8: \x01\x02\u8C37\u6B4C";
+
+template<typename T>
+bool WriteMessage(const char *value, T *message, string *wire_buffer) {
+ message->set_data(value);
+ wire_buffer->clear();
+ message->AppendToString(wire_buffer);
+ return (wire_buffer->size() > 0);
+}
+
+template<typename T>
+bool ReadMessage(const string &wire_buffer, T *message) {
+ return message->ParseFromArray(wire_buffer.data(), wire_buffer.size());
+}
+
+TEST(Utf8ValidationTest, WriteInvalidUTF8String) {
+ string wire_buffer;
+ protobuf_unittest::OneString input;
+ vector<string> errors;
+ {
+ ScopedMemoryLog log;
+ WriteMessage(kInvalidUTF8String, &input, &wire_buffer);
+ errors = log.GetMessages(ERROR);
+ }
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+ ASSERT_EQ(1, errors.size());
+ EXPECT_EQ("Encountered string containing invalid UTF-8 data while "
+ "serializing protocol buffer. Strings must contain only UTF-8; "
+ "use the 'bytes' type for raw bytes.",
+ errors[0]);
+
+#else
+ ASSERT_EQ(0, errors.size());
+#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+}
+
+TEST(Utf8ValidationTest, ReadInvalidUTF8String) {
+ string wire_buffer;
+ protobuf_unittest::OneString input;
+ WriteMessage(kInvalidUTF8String, &input, &wire_buffer);
+ protobuf_unittest::OneString output;
+ vector<string> errors;
+ {
+ ScopedMemoryLog log;
+ ReadMessage(wire_buffer, &output);
+ errors = log.GetMessages(ERROR);
+ }
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+ ASSERT_EQ(1, errors.size());
+ EXPECT_EQ("Encountered string containing invalid UTF-8 data while "
+ "parsing protocol buffer. Strings must contain only UTF-8; "
+ "use the 'bytes' type for raw bytes.",
+ errors[0]);
+
+#else
+ ASSERT_EQ(0, errors.size());
+#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+}
+
+TEST(Utf8ValidationTest, WriteValidUTF8String) {
+ string wire_buffer;
+ protobuf_unittest::OneString input;
+ vector<string> errors;
+ {
+ ScopedMemoryLog log;
+ WriteMessage(kValidUTF8String, &input, &wire_buffer);
+ errors = log.GetMessages(ERROR);
+ }
+ ASSERT_EQ(0, errors.size());
+}
+
+TEST(Utf8ValidationTest, ReadValidUTF8String) {
+ string wire_buffer;
+ protobuf_unittest::OneString input;
+ WriteMessage(kValidUTF8String, &input, &wire_buffer);
+ protobuf_unittest::OneString output;
+ vector<string> errors;
+ {
+ ScopedMemoryLog log;
+ ReadMessage(wire_buffer, &output);
+ errors = log.GetMessages(ERROR);
+ }
+ ASSERT_EQ(0, errors.size());
+ EXPECT_EQ(input.data(), output.data());
+}
+
+// Bytes: anything can pass as bytes, use invalid UTF-8 string to test
+TEST(Utf8ValidationTest, WriteArbitraryBytes) {
+ string wire_buffer;
+ protobuf_unittest::OneBytes input;
+ vector<string> errors;
+ {
+ ScopedMemoryLog log;
+ WriteMessage(kInvalidUTF8String, &input, &wire_buffer);
+ errors = log.GetMessages(ERROR);
+ }
+ ASSERT_EQ(0, errors.size());
+}
+
+TEST(Utf8ValidationTest, ReadArbitraryBytes) {
+ string wire_buffer;
+ protobuf_unittest::OneBytes input;
+ WriteMessage(kInvalidUTF8String, &input, &wire_buffer);
+ protobuf_unittest::OneBytes output;
+ vector<string> errors;
+ {
+ ScopedMemoryLog log;
+ ReadMessage(wire_buffer, &output);
+ errors = log.GetMessages(ERROR);
+ }
+ ASSERT_EQ(0, errors.size());
+ EXPECT_EQ(input.data(), output.data());
+}
+
} // namespace
} // namespace internal
} // namespace protobuf