aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/google/protobuf
diff options
context:
space:
mode:
authorGravatar Feng Xiao <xfxyjwf@gmail.com>2015-08-23 17:50:38 -0700
committerGravatar Feng Xiao <xfxyjwf@gmail.com>2015-08-23 17:50:38 -0700
commitb17ec3ca11ed13cc0d984f6d8be112c246b1994d (patch)
treec7c05b5d36fdb3c0a601a4dba0763e1e08f7bc52 /src/google/protobuf
parenteee38b0c018b3279f77d03dff796f440f40d3516 (diff)
Down-integrate from internal code base.
Diffstat (limited to 'src/google/protobuf')
-rw-r--r--src/google/protobuf/compiler/cpp/cpp_helpers.cc88
-rw-r--r--src/google/protobuf/compiler/cpp/cpp_helpers.h19
-rw-r--r--src/google/protobuf/compiler/cpp/cpp_map_field.cc63
-rw-r--r--src/google/protobuf/compiler/cpp/cpp_string_field.cc84
-rw-r--r--src/google/protobuf/compiler/parser.cc36
-rw-r--r--src/google/protobuf/lite_arena_unittest.cc83
-rw-r--r--src/google/protobuf/wire_format.cc56
-rw-r--r--src/google/protobuf/wire_format.h19
-rw-r--r--src/google/protobuf/wire_format_lite.cc31
-rw-r--r--src/google/protobuf/wire_format_lite.h10
10 files changed, 387 insertions, 102 deletions
diff --git a/src/google/protobuf/compiler/cpp/cpp_helpers.cc b/src/google/protobuf/compiler/cpp/cpp_helpers.cc
index 678a995a..09845458 100644
--- a/src/google/protobuf/compiler/cpp/cpp_helpers.cc
+++ b/src/google/protobuf/compiler/cpp/cpp_helpers.cc
@@ -600,6 +600,94 @@ bool IsAnyMessage(const Descriptor* descriptor) {
descriptor->file()->name() == kAnyProtoFile;
}
+enum Utf8CheckMode {
+ STRICT = 0, // Parsing will fail if non UTF-8 data is in string fields.
+ VERIFY = 1, // Only log an error but parsing will succeed.
+ NONE = 2, // No UTF-8 check.
+};
+
+// Which level of UTF-8 enforcemant is placed on this file.
+static Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field) {
+ if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3) {
+ return STRICT;
+ } else if (field->file()->options().optimize_for() !=
+ FileOptions::LITE_RUNTIME) {
+ return VERIFY;
+ } else {
+ return NONE;
+ }
+}
+
+static void GenerateUtf8CheckCode(const FieldDescriptor* field,
+ bool for_parse,
+ const map<string, string>& variables,
+ const char* parameters,
+ const char* strict_function,
+ const char* verify_function,
+ io::Printer* printer) {
+ switch (GetUtf8CheckMode(field)) {
+ case STRICT: {
+ if (for_parse) {
+ printer->Print("DO_(");
+ }
+ printer->Print(
+ "::google::protobuf::internal::WireFormatLite::$function$(\n",
+ "function", strict_function);
+ printer->Indent();
+ printer->Print(variables, parameters);
+ if (for_parse) {
+ printer->Print("::google::protobuf::internal::WireFormatLite::PARSE,\n");
+ } else {
+ printer->Print("::google::protobuf::internal::WireFormatLite::SERIALIZE,\n");
+ }
+ printer->Print("\"$full_name$\")", "full_name", field->full_name());
+ if (for_parse) {
+ printer->Print(")");
+ }
+ printer->Print(";\n");
+ printer->Outdent();
+ break;
+ }
+ case VERIFY: {
+ printer->Print(
+ "::google::protobuf::internal::WireFormat::$function$(\n",
+ "function", verify_function);
+ printer->Indent();
+ printer->Print(variables, parameters);
+ if (for_parse) {
+ printer->Print("::google::protobuf::internal::WireFormat::PARSE,\n");
+ } else {
+ printer->Print("::google::protobuf::internal::WireFormat::SERIALIZE,\n");
+ }
+ printer->Print("\"$full_name$\");\n", "full_name", field->full_name());
+ printer->Outdent();
+ break;
+ }
+ case NONE:
+ break;
+ }
+}
+
+void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
+ bool for_parse,
+ const map<string, string>& variables,
+ const char* parameters,
+ io::Printer* printer) {
+ GenerateUtf8CheckCode(field, for_parse, variables, parameters,
+ "VerifyUtf8String", "VerifyUTF8StringNamedField",
+ printer);
+}
+
+void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
+ bool for_parse,
+ const map<string, string>& variables,
+ const char* parameters,
+ io::Printer* printer) {
+ GenerateUtf8CheckCode(field, for_parse, variables, parameters,
+ "VerifyUtf8Cord", "VerifyUTF8CordNamedField",
+ printer);
+}
+
} // namespace cpp
} // namespace compiler
} // namespace protobuf
diff --git a/src/google/protobuf/compiler/cpp/cpp_helpers.h b/src/google/protobuf/compiler/cpp/cpp_helpers.h
index 29c1f90b..985cb04c 100644
--- a/src/google/protobuf/compiler/cpp/cpp_helpers.h
+++ b/src/google/protobuf/compiler/cpp/cpp_helpers.h
@@ -202,11 +202,6 @@ inline bool HasGenericServices(const FileDescriptor* file) {
file->options().cc_generic_services();
}
-// Should string fields in this file verify that their contents are UTF-8?
-inline bool HasUtf8Verification(const FileDescriptor* file) {
- return file->options().optimize_for() != FileOptions::LITE_RUNTIME;
-}
-
// Should we generate a separate, super-optimized code path for serializing to
// flat arrays? We don't do this in Lite mode because we'd rather reduce code
// size.
@@ -270,6 +265,20 @@ inline bool SupportsArenas(const FieldDescriptor* field) {
bool IsAnyMessage(const FileDescriptor* descriptor);
bool IsAnyMessage(const Descriptor* descriptor);
+void GenerateUtf8CheckCodeForString(
+ const FieldDescriptor* field,
+ bool for_parse,
+ const map<string, string>& variables,
+ const char* parameters,
+ io::Printer* printer);
+
+void GenerateUtf8CheckCodeForCord(
+ const FieldDescriptor* field,
+ bool for_parse,
+ const map<string, string>& variables,
+ const char* parameters,
+ io::Printer* printer);
+
} // namespace cpp
} // namespace compiler
} // namespace protobuf
diff --git a/src/google/protobuf/compiler/cpp/cpp_map_field.cc b/src/google/protobuf/compiler/cpp/cpp_map_field.cc
index a14d8986..25acc61b 100644
--- a/src/google/protobuf/compiler/cpp/cpp_map_field.cc
+++ b/src/google/protobuf/compiler/cpp/cpp_map_field.cc
@@ -234,6 +234,20 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
"}\n");
}
+ const FieldDescriptor* key_field =
+ descriptor_->message_type()->FindFieldByName("key");
+ if (key_field->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ key_field, true, variables_,
+ "entry->key().data(), entry->key().length(),\n", printer);
+ }
+ if (value_field->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ value_field, true, variables_,
+ "entry->mutable_value()->data(),\n"
+ "entry->mutable_value()->length(),\n", printer);
+ }
+
// If entry is allocated by arena, its desctructor should be avoided.
if (SupportsArenas(descriptor_)) {
printer->Print(variables_,
@@ -261,7 +275,30 @@ GenerateSerializeWithCachedSizes(io::Printer* printer) const {
printer->Print(variables_,
" entry.reset($name$_.New$wrapper$(it->first, it->second));\n"
" ::google::protobuf::internal::WireFormatLite::Write$stream_writer$(\n"
- " $number$, *entry, output);\n"
+ " $number$, *entry, output);\n");
+
+ printer->Indent();
+ printer->Indent();
+
+ const FieldDescriptor* key_field =
+ descriptor_->message_type()->FindFieldByName("key");
+ const FieldDescriptor* value_field =
+ descriptor_->message_type()->FindFieldByName("value");
+ if (key_field->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ key_field, false, variables_,
+ "it->first.data(), it->first.length(),\n", printer);
+ }
+ if (value_field->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ value_field, false, variables_,
+ "it->second.data(), it->second.length(),\n", printer);
+ }
+
+ printer->Outdent();
+ printer->Outdent();
+
+ printer->Print(
" }\n");
// If entry is allocated by arena, its desctructor should be avoided.
@@ -296,7 +333,29 @@ GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const {
" entry.reset($name$_.New$wrapper$(it->first, it->second));\n"
" target = ::google::protobuf::internal::WireFormatLite::\n"
" Write$declared_type$NoVirtualToArray(\n"
- " $number$, *entry, target);\n"
+ " $number$, *entry, target);\n");
+
+ printer->Indent();
+ printer->Indent();
+
+ const FieldDescriptor* key_field =
+ descriptor_->message_type()->FindFieldByName("key");
+ const FieldDescriptor* value_field =
+ descriptor_->message_type()->FindFieldByName("value");
+ if (key_field->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ key_field, false, variables_,
+ "it->first.data(), it->first.length(),\n", printer);
+ }
+ if (value_field->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ value_field, false, variables_,
+ "it->second.data(), it->second.length(),\n", printer);
+ }
+
+ printer->Outdent();
+ printer->Outdent();
+ printer->Print(
" }\n");
// If entry is allocated by arena, its desctructor should be avoided.
diff --git a/src/google/protobuf/compiler/cpp/cpp_string_field.cc b/src/google/protobuf/compiler/cpp/cpp_string_field.cc
index d1af6dda..6b0821a6 100644
--- a/src/google/protobuf/compiler/cpp/cpp_string_field.cc
+++ b/src/google/protobuf/compiler/cpp/cpp_string_field.cc
@@ -367,25 +367,19 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
"DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n"
" input, this->mutable_$name$()));\n");
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$().data(), this->$name$().length(),\n"
- " ::google::protobuf::internal::WireFormat::PARSE,\n"
- " \"$full_name$\");\n");
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, true, variables_,
+ "this->$name$().data(), this->$name$().length(),\n", printer);
}
}
void StringFieldGenerator::
GenerateSerializeWithCachedSizes(io::Printer* printer) const {
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$().data(), this->$name$().length(),\n"
- " ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
- " \"$full_name$\");\n");
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, false, variables_,
+ "this->$name$().data(), this->$name$().length(),\n", printer);
}
printer->Print(variables_,
"::google::protobuf::internal::WireFormatLite::Write$declared_type$MaybeAliased(\n"
@@ -394,13 +388,10 @@ GenerateSerializeWithCachedSizes(io::Printer* printer) const {
void StringFieldGenerator::
GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const {
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$().data(), this->$name$().length(),\n"
- " ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
- " \"$full_name$\");\n");
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, false, variables_,
+ "this->$name$().data(), this->$name$().length(),\n", printer);
}
printer->Print(variables_,
"target =\n"
@@ -665,13 +656,10 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
"DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n"
" input, this->mutable_$name$()));\n");
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$().data(), this->$name$().length(),\n"
- " ::google::protobuf::internal::WireFormat::PARSE,\n"
- " \"$full_name$\");\n");
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, true, variables_,
+ "this->$name$().data(), this->$name$().length(),\n", printer);
}
}
@@ -817,14 +805,12 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
printer->Print(variables_,
"DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n"
" input, this->add_$name$()));\n");
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$(this->$name$_size() - 1).data(),\n"
- " this->$name$(this->$name$_size() - 1).length(),\n"
- " ::google::protobuf::internal::WireFormat::PARSE,\n"
- " \"$full_name$\");\n");
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, true, variables_,
+ "this->$name$(this->$name$_size() - 1).data(),\n"
+ "this->$name$(this->$name$_size() - 1).length(),\n",
+ printer);
}
}
@@ -832,14 +818,13 @@ void RepeatedStringFieldGenerator::
GenerateSerializeWithCachedSizes(io::Printer* printer) const {
printer->Print(variables_,
"for (int i = 0; i < this->$name$_size(); i++) {\n");
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$(i).data(), this->$name$(i).length(),\n"
- " ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
- " \"$full_name$\");\n");
+ printer->Indent();
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, false, variables_,
+ "this->$name$(i).data(), this->$name$(i).length(),\n", printer);
}
+ printer->Outdent();
printer->Print(variables_,
" ::google::protobuf::internal::WireFormatLite::Write$declared_type$(\n"
" $number$, this->$name$(i), output);\n"
@@ -850,14 +835,13 @@ void RepeatedStringFieldGenerator::
GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const {
printer->Print(variables_,
"for (int i = 0; i < this->$name$_size(); i++) {\n");
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- " ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$(i).data(), this->$name$(i).length(),\n"
- " ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
- " \"$full_name$\");\n");
+ printer->Indent();
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, false, variables_,
+ "this->$name$(i).data(), this->$name$(i).length(),\n", printer);
}
+ printer->Outdent();
printer->Print(variables_,
" target = ::google::protobuf::internal::WireFormatLite::\n"
" Write$declared_type$ToArray($number$, this->$name$(i), target);\n"
diff --git a/src/google/protobuf/compiler/parser.cc b/src/google/protobuf/compiler/parser.cc
index 895ff34a..4d018425 100644
--- a/src/google/protobuf/compiler/parser.cc
+++ b/src/google/protobuf/compiler/parser.cc
@@ -939,6 +939,42 @@ void Parser::GenerateMapEntry(const MapField& map_field,
} else {
value_field->set_type_name(map_field.value_type_name);
}
+ // Propagate the "enforce_utf8" option to key and value fields if they
+ // are strings. This helps simplify the implementation of code generators
+ // and also reflection-based parsing code.
+ //
+ // The following definition:
+ // message Foo {
+ // map<string, string> value = 1 [enforce_utf8 = false];
+ // }
+ // will be interpreted as:
+ // message Foo {
+ // message ValueEntry {
+ // option map_entry = true;
+ // string key = 1 [enforce_utf8 = false];
+ // string value = 2 [enforce_utf8 = false];
+ // }
+ // repeated ValueEntry value = 1 [enforce_utf8 = false];
+ // }
+ //
+ // TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed
+ // from protocol compiler.
+ for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
+ const UninterpretedOption& option =
+ field->options().uninterpreted_option(i);
+ if (option.name_size() == 1 &&
+ option.name(0).name_part() == "enforce_utf8" &&
+ !option.name(0).is_extension()) {
+ if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
+ key_field->mutable_options()->add_uninterpreted_option()
+ ->CopyFrom(option);
+ }
+ if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
+ value_field->mutable_options()->add_uninterpreted_option()
+ ->CopyFrom(option);
+ }
+ }
+ }
}
bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
diff --git a/src/google/protobuf/lite_arena_unittest.cc b/src/google/protobuf/lite_arena_unittest.cc
new file mode 100644
index 00000000..f0bee880
--- /dev/null
+++ b/src/google/protobuf/lite_arena_unittest.cc
@@ -0,0 +1,83 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <google/protobuf/arena_test_util.h>
+#include <google/protobuf/map_lite_test_util.h>
+#include <google/protobuf/testing/googletest.h>
+#include <gtest/gtest.h>
+
+namespace google {
+namespace protobuf {
+namespace {
+
+TEST(LiteArenaTest, MapNoHeapAllocation) {
+ // Allocate a large initial block to avoid mallocs during hooked test.
+ std::vector<char> arena_block(128 * 1024);
+ google::protobuf::ArenaOptions options;
+ options.initial_block = &arena_block[0];
+ options.initial_block_size = arena_block.size();
+ google::protobuf::Arena arena(options);
+ string data;
+ data.reserve(128 * 1024);
+
+ {
+ // TODO(teboring): Enable no heap check when ArenaStringPtr is used in
+ // Map.
+ // google::protobuf::internal::NoHeapChecker no_heap;
+
+ protobuf_unittest::TestArenaMapLite* from =
+ google::protobuf::Arena::CreateMessage<protobuf_unittest::TestArenaMapLite>(&arena);
+ google::protobuf::MapLiteTestUtil::SetArenaMapFields(from);
+ from->SerializeToString(&data);
+
+ protobuf_unittest::TestArenaMapLite* to =
+ google::protobuf::Arena::CreateMessage<protobuf_unittest::TestArenaMapLite>(&arena);
+ to->ParseFromString(data);
+ google::protobuf::MapLiteTestUtil::ExpectArenaMapFieldsSet(*to);
+ }
+}
+
+TEST(LiteArenaTest, UnknownFieldMemLeak) {
+ google::protobuf::Arena arena;
+ protobuf_unittest::ForeignMessageArenaLite* message =
+ google::protobuf::Arena::CreateMessage<protobuf_unittest::ForeignMessageArenaLite>(
+ &arena);
+ string data = "\012\000";
+ int original_capacity = data.capacity();
+ while (data.capacity() <= original_capacity) {
+ data.append("a");
+ }
+ data[1] = data.size() - 2;
+ message->ParseFromString(data);
+}
+
+} // namespace
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/wire_format.cc b/src/google/protobuf/wire_format.cc
index 8fbd116b..5ee4e25d 100644
--- a/src/google/protobuf/wire_format.cc
+++ b/src/google/protobuf/wire_format.cc
@@ -461,6 +461,10 @@ bool WireFormat::ParseAndMergeMessageSetField(uint32 field_number,
}
}
+static bool StrictUtf8Check(const FieldDescriptor* field) {
+ return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3;
+}
+
bool WireFormat::ParseAndMergeField(
uint32 tag,
const FieldDescriptor* field, // May be NULL for unknown
@@ -633,10 +637,19 @@ bool WireFormat::ParseAndMergeField(
// Handle strings separately so that we can optimize the ctype=CORD case.
case FieldDescriptor::TYPE_STRING: {
+ bool strict_utf8_check = StrictUtf8Check(field);
string value;
if (!WireFormatLite::ReadString(input, &value)) return false;
- VerifyUTF8StringNamedField(value.data(), value.length(), PARSE,
- field->name().c_str());
+ if (strict_utf8_check) {
+ if (!WireFormatLite::VerifyUtf8String(
+ value.data(), value.length(), WireFormatLite::PARSE,
+ field->full_name().c_str())) {
+ return false;
+ }
+ } else {
+ VerifyUTF8StringNamedField(value.data(), value.length(), PARSE,
+ field->full_name().c_str());
+ }
if (field->is_repeated()) {
message_reflection->AddString(message, field, value);
} else {
@@ -894,13 +907,20 @@ void WireFormat::SerializeFieldWithCachedSizes(
// Handle strings separately so that we can get string references
// instead of copying.
case FieldDescriptor::TYPE_STRING: {
+ bool strict_utf8_check = StrictUtf8Check(field);
string scratch;
const string& value = field->is_repeated() ?
message_reflection->GetRepeatedStringReference(
message, field, j, &scratch) :
message_reflection->GetStringReference(message, field, &scratch);
- VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE,
- field->name().c_str());
+ if (strict_utf8_check) {
+ WireFormatLite::VerifyUtf8String(value.data(), value.length(),
+ WireFormatLite::SERIALIZE,
+ field->full_name().c_str());
+ } else {
+ VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE,
+ field->full_name().c_str());
+ }
WireFormatLite::WriteString(field->number(), value, output);
break;
}
@@ -1108,34 +1128,6 @@ int WireFormat::MessageSetItemByteSize(
return our_size;
}
-void WireFormat::VerifyUTF8StringFallback(const char* data,
- int size,
- Operation op,
- const char* field_name) {
- if (!IsStructurallyValidUTF8(data, size)) {
- const char* operation_str = NULL;
- switch (op) {
- case PARSE:
- operation_str = "parsing";
- break;
- case SERIALIZE:
- operation_str = "serializing";
- break;
- // no default case: have the compiler warn if a case is not covered.
- }
- string quoted_field_name = "";
- if (field_name != NULL) {
- quoted_field_name = StringPrintf(" '%s'", field_name);
- }
- // no space below to avoid double space when the field name is missing.
- GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid "
- << "UTF-8 data when " << operation_str << " a protocol "
- << "buffer. Use the 'bytes' type if you intend to send raw "
- << "bytes. ";
- }
-}
-
-
} // namespace internal
} // namespace protobuf
} // namespace google
diff --git a/src/google/protobuf/wire_format.h b/src/google/protobuf/wire_format.h
index 84270fee..941be75b 100644
--- a/src/google/protobuf/wire_format.h
+++ b/src/google/protobuf/wire_format.h
@@ -231,8 +231,8 @@ class LIBPROTOBUF_EXPORT WireFormat {
const Message& message);
enum Operation {
- PARSE,
- SERIALIZE,
+ PARSE = 0,
+ SERIALIZE = 1,
};
// Verifies that a string field is valid UTF8, logging an error if not.
@@ -247,13 +247,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
const char* field_name);
private:
- // Verifies that a string field is valid UTF8, logging an error if not.
- static void VerifyUTF8StringFallback(
- const char* data,
- int size,
- Operation op,
- const char* field_name);
-
// Skip a MessageSet field.
static bool SkipMessageSetField(io::CodedInputStream* input,
uint32 field_number,
@@ -265,8 +258,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
Message* message,
io::CodedInputStream* input);
-
-
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat);
};
@@ -321,7 +312,8 @@ inline int WireFormat::TagSize(int field_number, FieldDescriptor::Type type) {
inline void WireFormat::VerifyUTF8String(const char* data, int size,
WireFormat::Operation op) {
#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
- WireFormat::VerifyUTF8StringFallback(data, size, op, NULL);
+ WireFormatLite::VerifyUtf8String(
+ data, size, static_cast<WireFormatLite::Operation>(op), NULL);
#else
// Avoid the compiler warning about unsued variables.
(void)data; (void)size; (void)op;
@@ -332,7 +324,8 @@ inline void WireFormat::VerifyUTF8StringNamedField(
const char* data, int size, WireFormat::Operation op,
const char* field_name) {
#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
- WireFormat::VerifyUTF8StringFallback(data, size, op, field_name);
+ WireFormatLite::VerifyUtf8String(
+ data, size, static_cast<WireFormatLite::Operation>(op), field_name);
#endif
}
diff --git a/src/google/protobuf/wire_format_lite.cc b/src/google/protobuf/wire_format_lite.cc
index dade41af..847e3500 100644
--- a/src/google/protobuf/wire_format_lite.cc
+++ b/src/google/protobuf/wire_format_lite.cc
@@ -39,10 +39,12 @@
#include <vector>
#include <google/protobuf/stubs/logging.h>
#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stringprintf.h>
#include <google/protobuf/io/coded_stream_inl.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+
namespace google {
namespace protobuf {
namespace internal {
@@ -505,6 +507,35 @@ bool WireFormatLite::ReadBytes(io::CodedInputStream* input, string** p) {
return ReadBytesToString(input, *p);
}
+bool WireFormatLite::VerifyUtf8String(const char* data,
+ int size,
+ Operation op,
+ const char* field_name) {
+ if (!IsStructurallyValidUTF8(data, size)) {
+ const char* operation_str = NULL;
+ switch (op) {
+ case PARSE:
+ operation_str = "parsing";
+ break;
+ case SERIALIZE:
+ operation_str = "serializing";
+ break;
+ // no default case: have the compiler warn if a case is not covered.
+ }
+ string quoted_field_name = "";
+ if (field_name != NULL) {
+ quoted_field_name = StringPrintf(" '%s'", field_name);
+ }
+ // no space below to avoid double space when the field name is missing.
+ GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid "
+ << "UTF-8 data when " << operation_str << " a protocol "
+ << "buffer. Use the 'bytes' type if you intend to send raw "
+ << "bytes. ";
+ return false;
+ }
+ return true;
+}
+
} // namespace internal
} // namespace protobuf
} // namespace google
diff --git a/src/google/protobuf/wire_format_lite.h b/src/google/protobuf/wire_format_lite.h
index 19fbc2c5..55fc7ecd 100644
--- a/src/google/protobuf/wire_format_lite.h
+++ b/src/google/protobuf/wire_format_lite.h
@@ -316,6 +316,16 @@ class LIBPROTOBUF_EXPORT WireFormatLite {
static bool ReadBytes(input, string** p);
+ enum Operation {
+ PARSE = 0,
+ SERIALIZE = 1,
+ };
+
+ // Returns true if the data is valid UTF-8.
+ static bool VerifyUtf8String(const char* data, int size,
+ Operation op,
+ const char* field_name);
+
static inline bool ReadGroup (field_number, input, MessageLite* value);
static inline bool ReadMessage(input, MessageLite* value);