From 193af9f4c1ebfeec4c9322fc4bbb2a513267d345 Mon Sep 17 00:00:00 2001 From: Yilun Chong Date: Wed, 30 May 2018 17:06:45 -0700 Subject: Add proto2 to proto3 util --- benchmarks/util/data_proto2_to_proto3_util.h | 63 +++++++++++++++ benchmarks/util/gogo_data_scrubber.cc | 39 +-------- benchmarks/util/proto3_data_stripper.cc | 74 +++++++++++++++++ benchmarks/util/protoc-gen-gogoproto.cc | 4 +- benchmarks/util/protoc-gen-proto2_to_proto3.cc | 108 +++++++++++++++++++++++++ benchmarks/util/schema_proto2_to_proto3_util.h | 62 +++++++++++++- 6 files changed, 311 insertions(+), 39 deletions(-) create mode 100644 benchmarks/util/data_proto2_to_proto3_util.h create mode 100644 benchmarks/util/proto3_data_stripper.cc create mode 100644 benchmarks/util/protoc-gen-proto2_to_proto3.cc (limited to 'benchmarks/util') diff --git a/benchmarks/util/data_proto2_to_proto3_util.h b/benchmarks/util/data_proto2_to_proto3_util.h new file mode 100644 index 00000000..7207efde --- /dev/null +++ b/benchmarks/util/data_proto2_to_proto3_util.h @@ -0,0 +1,63 @@ +#ifndef PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_ +#define PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_ + +#include "google/protobuf/message.h" +#include "google/protobuf/descriptor.h" + +using google::protobuf::FieldDescriptor; +using google::protobuf::Message; +using google::protobuf::Reflection; + +namespace google { +namespace protobuf { +namespace util { + +class DataStripper { + public: + void StripMessage(Message *message) { + std::vector set_fields; + const Reflection* reflection = message->GetReflection(); + reflection->ListFields(*message, &set_fields); + + for (size_t i = 0; i < set_fields.size(); i++) { + const FieldDescriptor* field = set_fields[i]; + if (ShouldBeClear(field)) { + reflection->ClearField(message, field); + } + if (field->type() == FieldDescriptor::TYPE_MESSAGE) { + if (field->is_repeated()) { + for (int j = 0; j < reflection->FieldSize(*message, field); j++) { + StripMessage(reflection->MutableRepeatedMessage(message, field, j)); + } + } else { + StripMessage(reflection->MutableMessage(message, field)); + } + } + } + + reflection->MutableUnknownFields(message)->Clear(); + } + private: + virtual bool ShouldBeClear(const FieldDescriptor *field) = 0; +}; + +class GogoDataStripper : public DataStripper { + private: + virtual bool ShouldBeClear(const FieldDescriptor *field) { + return field->type() == FieldDescriptor::TYPE_GROUP; + } +}; + +class Proto3DataStripper : public DataStripper { + private: + virtual bool ShouldBeClear(const FieldDescriptor *field) { + return field->type() == FieldDescriptor::TYPE_GROUP || + field->is_extension(); + } +}; + +} // namespace util +} // namespace protobuf +} // namespace google + +#endif // PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_ diff --git a/benchmarks/util/gogo_data_scrubber.cc b/benchmarks/util/gogo_data_scrubber.cc index fb9af6e2..9ef57b0d 100644 --- a/benchmarks/util/gogo_data_scrubber.cc +++ b/benchmarks/util/gogo_data_scrubber.cc @@ -4,43 +4,11 @@ #include "datasets/google_message2/benchmark_message2.pb.h" #include "datasets/google_message3/benchmark_message3.pb.h" #include "datasets/google_message4/benchmark_message4.pb.h" - -#include "google/protobuf/message.h" -#include "google/protobuf/descriptor.h" +#include "data_proto2_to_proto3_util.h" #include -using google::protobuf::FieldDescriptor; -using google::protobuf::Message; -using google::protobuf::Reflection; - - -class DataGroupStripper { - public: - static void StripMessage(Message *message) { - std::vector set_fields; - const Reflection* reflection = message->GetReflection(); - reflection->ListFields(*message, &set_fields); - - for (size_t i = 0; i < set_fields.size(); i++) { - const FieldDescriptor* field = set_fields[i]; - if (field->type() == FieldDescriptor::TYPE_GROUP) { - reflection->ClearField(message, field); - } - if (field->type() == FieldDescriptor::TYPE_MESSAGE) { - if (field->is_repeated()) { - for (int j = 0; j < reflection->FieldSize(*message, field); j++) { - StripMessage(reflection->MutableRepeatedMessage(message, field, j)); - } - } else { - StripMessage(reflection->MutableMessage(message, field)); - } - } - } - - reflection->MutableUnknownFields(message)->Clear(); - } -}; +using google::protobuf::util::GogoDataStripper; std::string ReadFile(const std::string& name) { std::ifstream file(name.c_str()); @@ -91,7 +59,8 @@ int main(int argc, char *argv[]) { for (int i = 0; i < dataset.payload_size(); i++) { message->ParseFromString(dataset.payload(i)); - DataGroupStripper::StripMessage(message); + GogoDataStripper stripper; + stripper.StripMessage(message); dataset.set_payload(i, message->SerializeAsString()); } diff --git a/benchmarks/util/proto3_data_stripper.cc b/benchmarks/util/proto3_data_stripper.cc new file mode 100644 index 00000000..3096c4c1 --- /dev/null +++ b/benchmarks/util/proto3_data_stripper.cc @@ -0,0 +1,74 @@ +#include "benchmarks.pb.h" +#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" +#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" +#include "datasets/google_message2/benchmark_message2.pb.h" +#include "datasets/google_message3/benchmark_message3.pb.h" +#include "datasets/google_message4/benchmark_message4.pb.h" +#include "data_proto2_to_proto3_util.h" + +#include + +using google::protobuf::util::Proto3DataStripper; + +std::string ReadFile(const std::string& name) { + std::ifstream file(name.c_str()); + GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" + << name + << "', please make sure you are running this command from the benchmarks" + << " directory.\n"; + return std::string((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); +} + +int main(int argc, char *argv[]) { + if (argc % 2 == 0 || argc == 1) { + std::cerr << "Usage: [input_files] [output_file_names] where " << + "input_files are one to one mapping to output_file_names." << + std::endl; + return 1; + } + + for (int i = argc / 2; i > 0; i--) { + const std::string &input_file = argv[i]; + const std::string &output_file = argv[i + argc / 2]; + + std::cerr << "Generating " << input_file + << " to " << output_file << std::endl; + benchmarks::BenchmarkDataset dataset; + Message* message; + std::string dataset_payload = ReadFile(input_file); + GOOGLE_CHECK(dataset.ParseFromString(dataset_payload)) + << "Can' t parse data file " << input_file; + + if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") { + message = new benchmarks::proto3::GoogleMessage1; + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") { + message = new benchmarks::proto2::GoogleMessage1; + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") { + message = new benchmarks::proto2::GoogleMessage2; + } else if (dataset.message_name() == + "benchmarks.google_message3.GoogleMessage3") { + message = new benchmarks::google_message3::GoogleMessage3; + } else if (dataset.message_name() == + "benchmarks.google_message4.GoogleMessage4") { + message = new benchmarks::google_message4::GoogleMessage4; + } else { + std::cerr << "Unknown message type: " << dataset.message_name(); + exit(1); + } + + for (int i = 0; i < dataset.payload_size(); i++) { + message->ParseFromString(dataset.payload(i)); + Proto3DataStripper stripper; + stripper.StripMessage(message); + dataset.set_payload(i, message->SerializeAsString()); + } + + std::ofstream ofs(output_file); + ofs << dataset.SerializeAsString(); + ofs.close(); + } + + + return 0; +} diff --git a/benchmarks/util/protoc-gen-gogoproto.cc b/benchmarks/util/protoc-gen-gogoproto.cc index bfa6a5e5..9c1b3d04 100644 --- a/benchmarks/util/protoc-gen-gogoproto.cc +++ b/benchmarks/util/protoc-gen-gogoproto.cc @@ -12,7 +12,7 @@ using google::protobuf::FileDescriptor; using google::protobuf::DescriptorPool; using google::protobuf::io::Printer; using google::protobuf::util::SchemaGroupStripper; -using google::protobuf::util::SchemaAddZeroEnumValue; +using google::protobuf::util::EnumScrubber; namespace google { namespace protobuf { @@ -74,7 +74,7 @@ class GoGoProtoGenerator : public CodeGenerator { file->CopyTo(&new_file); SchemaGroupStripper::StripFile(file, &new_file); - SchemaAddZeroEnumValue enum_scrubber; + EnumScrubber enum_scrubber; enum_scrubber.ScrubFile(&new_file); string filename = file->name(); diff --git a/benchmarks/util/protoc-gen-proto2_to_proto3.cc b/benchmarks/util/protoc-gen-proto2_to_proto3.cc new file mode 100644 index 00000000..b1e6ba41 --- /dev/null +++ b/benchmarks/util/protoc-gen-proto2_to_proto3.cc @@ -0,0 +1,108 @@ +#include "google/protobuf/compiler/code_generator.h" +#include "google/protobuf/io/zero_copy_stream.h" +#include "google/protobuf/io/printer.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "schema_proto2_to_proto3_util.h" + +#include "google/protobuf/compiler/plugin.h" + +using google::protobuf::FileDescriptorProto; +using google::protobuf::FileDescriptor; +using google::protobuf::DescriptorPool; +using google::protobuf::io::Printer; +using google::protobuf::util::SchemaGroupStripper; +using google::protobuf::util::EnumScrubber; +using google::protobuf::util::ExtensionStripper; +using google::protobuf::util::FieldScrubber; + +namespace google { +namespace protobuf { +namespace compiler { + +namespace { + +string StripProto(string filename) { + if (filename.substr(filename.size() - 11) == ".protodevel") { + // .protodevel + return filename.substr(0, filename.size() - 11); + } else { + // .proto + return filename.substr(0, filename.size() - 6); + } +} + +DescriptorPool new_pool_; + +} // namespace + +class Proto2ToProto3Generator : public CodeGenerator { + public: + virtual bool GenerateAll(const std::vector& files, + const string& parameter, + GeneratorContext* context, + string* error) const { + for (int i = 0; i < files.size(); i++) { + for (auto file : files) { + bool can_generate = + (new_pool_.FindFileByName(file->name()) == nullptr); + for (int j = 0; j < file->dependency_count(); j++) { + can_generate &= (new_pool_.FindFileByName( + file->dependency(j)->name()) != nullptr); + } + for (int j = 0; j < file->public_dependency_count(); j++) { + can_generate &= (new_pool_.FindFileByName( + file->public_dependency(j)->name()) != nullptr); + } + for (int j = 0; j < file->weak_dependency_count(); j++) { + can_generate &= (new_pool_.FindFileByName( + file->weak_dependency(j)->name()) != nullptr); + } + if (can_generate) { + Generate(file, parameter, context, error); + break; + } + } + } + + return true; + } + + virtual bool Generate(const FileDescriptor* file, + const string& parameter, + GeneratorContext* context, + string* error) const { + FileDescriptorProto new_file; + file->CopyTo(&new_file); + SchemaGroupStripper::StripFile(file, &new_file); + + EnumScrubber enum_scrubber; + enum_scrubber.ScrubFile(&new_file); + ExtensionStripper::StripFile(&new_file); + FieldScrubber::ScrubFile(&new_file); + new_file.set_syntax("proto3"); + + string filename = file->name(); + string basename = StripProto(filename); + + std::vector> option_pairs; + ParseGeneratorParameter(parameter, &option_pairs); + + std::unique_ptr output( + context->Open(basename + ".proto")); + string content = new_pool_.BuildFile(new_file)->DebugString(); + Printer printer(output.get(), '$'); + printer.WriteRaw(content.c_str(), content.size()); + + return true; + } +}; + +} // namespace compiler +} // namespace protobuf +} // namespace google + +int main(int argc, char* argv[]) { + google::protobuf::compiler::Proto2ToProto3Generator generator; + return google::protobuf::compiler::PluginMain(argc, argv, &generator); +} diff --git a/benchmarks/util/schema_proto2_to_proto3_util.h b/benchmarks/util/schema_proto2_to_proto3_util.h index 089012dd..8cbcbc40 100644 --- a/benchmarks/util/schema_proto2_to_proto3_util.h +++ b/benchmarks/util/schema_proto2_to_proto3_util.h @@ -74,10 +74,10 @@ class SchemaGroupStripper { }; -class SchemaAddZeroEnumValue { +class EnumScrubber { public: - SchemaAddZeroEnumValue() + EnumScrubber() : total_added_(0) { } @@ -130,6 +130,64 @@ class SchemaAddZeroEnumValue { int total_added_; }; +class ExtensionStripper { + public: + static void StripFile(FileDescriptorProto *file) { + for (int i = 0; i < file->mutable_message_type()->size(); i++) { + StripMessage(file->mutable_message_type(i)); + } + file->mutable_extension()->Clear(); + } + private: + static void StripMessage(DescriptorProto *message_type) { + message_type->mutable_extension()->Clear(); + message_type->clear_extension_range(); + for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) { + StripMessage(message_type->mutable_nested_type(i)); + } + } +}; + + +class FieldScrubber { + public: + static void ScrubFile(FileDescriptorProto *file) { + for (int i = 0; i < file->mutable_message_type()->size(); i++) { + ScrubMessage(file->mutable_message_type(i)); + } + for (int i = 0; i < file->mutable_extension()->size(); i++) { + file->mutable_extension(i)->clear_default_value(); + if (ShouldClearLabel(file->mutable_extension(i))) { + file->mutable_extension(i)->clear_label(); + } + } + } + private: + static bool ShouldClearLabel(const FieldDescriptorProto *field) { + return field->label() == FieldDescriptorProto::LABEL_OPTIONAL || + field->label() == FieldDescriptorProto::LABEL_REQUIRED; + } + + static void ScrubMessage(DescriptorProto *message_type) { + message_type->mutable_extension()->Clear(); + for (int i = 0; i < message_type->mutable_extension()->size(); i++) { + message_type->mutable_extension(i)->clear_default_value(); + if (ShouldClearLabel(message_type->mutable_extension(i))) { + message_type->mutable_extension(i)->clear_label(); + } + } + for (int i = 0; i < message_type->mutable_field()->size(); i++) { + message_type->mutable_field(i)->clear_default_value(); + if (ShouldClearLabel(message_type->mutable_field(i))) { + message_type->mutable_field(i)->clear_label(); + } + } + for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) { + ScrubMessage(message_type->mutable_nested_type(i)); + } + } +}; + } // namespace util } // namespace protobuf } // namespace google -- cgit v1.2.3