From c703061d4993904d1fb42e9ae88034112eceb261 Mon Sep 17 00:00:00 2001 From: Yilun Chong Date: Tue, 10 Apr 2018 14:32:28 -0700 Subject: Add gogo benchmark --- benchmarks/util/gogo_data_scrubber.cc | 105 +++++++++++++++++++ benchmarks/util/protoc-gen-gogoproto.cc | 103 +++++++++++++++++++ benchmarks/util/schema_proto2_to_proto3_util.h | 137 +++++++++++++++++++++++++ 3 files changed, 345 insertions(+) create mode 100644 benchmarks/util/gogo_data_scrubber.cc create mode 100644 benchmarks/util/protoc-gen-gogoproto.cc create mode 100644 benchmarks/util/schema_proto2_to_proto3_util.h (limited to 'benchmarks/util') diff --git a/benchmarks/util/gogo_data_scrubber.cc b/benchmarks/util/gogo_data_scrubber.cc new file mode 100644 index 00000000..fb9af6e2 --- /dev/null +++ b/benchmarks/util/gogo_data_scrubber.cc @@ -0,0 +1,105 @@ +#include "benchmarks.pb.h" +#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" +#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" +#include "datasets/google_message2/benchmark_message2.pb.h" +#include "datasets/google_message3/benchmark_message3.pb.h" +#include "datasets/google_message4/benchmark_message4.pb.h" + +#include "google/protobuf/message.h" +#include "google/protobuf/descriptor.h" + +#include + +using google::protobuf::FieldDescriptor; +using google::protobuf::Message; +using google::protobuf::Reflection; + + +class DataGroupStripper { + public: + static void StripMessage(Message *message) { + std::vector set_fields; + const Reflection* reflection = message->GetReflection(); + reflection->ListFields(*message, &set_fields); + + for (size_t i = 0; i < set_fields.size(); i++) { + const FieldDescriptor* field = set_fields[i]; + if (field->type() == FieldDescriptor::TYPE_GROUP) { + reflection->ClearField(message, field); + } + if (field->type() == FieldDescriptor::TYPE_MESSAGE) { + if (field->is_repeated()) { + for (int j = 0; j < reflection->FieldSize(*message, field); j++) { + StripMessage(reflection->MutableRepeatedMessage(message, field, j)); + } + } else { + StripMessage(reflection->MutableMessage(message, field)); + } + } + } + + reflection->MutableUnknownFields(message)->Clear(); + } +}; + +std::string ReadFile(const std::string& name) { + std::ifstream file(name.c_str()); + GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" + << name + << "', please make sure you are running this command from the benchmarks" + << " directory.\n"; + return std::string((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); +} + +int main(int argc, char *argv[]) { + if (argc % 2 == 0 || argc == 1) { + std::cerr << "Usage: [input_files] [output_file_names] where " << + "input_files are one to one mapping to output_file_names." << + std::endl; + return 1; + } + + for (int i = argc / 2; i > 0; i--) { + const std::string &input_file = argv[i]; + const std::string &output_file = argv[i + argc / 2]; + + std::cerr << "Generating " << input_file + << " to " << output_file << std::endl; + benchmarks::BenchmarkDataset dataset; + Message* message; + std::string dataset_payload = ReadFile(input_file); + GOOGLE_CHECK(dataset.ParseFromString(dataset_payload)) + << "Can' t parse data file " << input_file; + + if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") { + message = new benchmarks::proto3::GoogleMessage1; + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") { + message = new benchmarks::proto2::GoogleMessage1; + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") { + message = new benchmarks::proto2::GoogleMessage2; + } else if (dataset.message_name() == + "benchmarks.google_message3.GoogleMessage3") { + message = new benchmarks::google_message3::GoogleMessage3; + } else if (dataset.message_name() == + "benchmarks.google_message4.GoogleMessage4") { + message = new benchmarks::google_message4::GoogleMessage4; + } else { + std::cerr << "Unknown message type: " << dataset.message_name(); + exit(1); + } + + for (int i = 0; i < dataset.payload_size(); i++) { + message->ParseFromString(dataset.payload(i)); + DataGroupStripper::StripMessage(message); + dataset.set_payload(i, message->SerializeAsString()); + } + + std::ofstream ofs(output_file); + ofs << dataset.SerializeAsString(); + ofs.close(); + } + + + return 0; +} diff --git a/benchmarks/util/protoc-gen-gogoproto.cc b/benchmarks/util/protoc-gen-gogoproto.cc new file mode 100644 index 00000000..bfa6a5e5 --- /dev/null +++ b/benchmarks/util/protoc-gen-gogoproto.cc @@ -0,0 +1,103 @@ +#include "google/protobuf/compiler/code_generator.h" +#include "google/protobuf/io/zero_copy_stream.h" +#include "google/protobuf/io/printer.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "schema_proto2_to_proto3_util.h" + +#include "google/protobuf/compiler/plugin.h" + +using google::protobuf::FileDescriptorProto; +using google::protobuf::FileDescriptor; +using google::protobuf::DescriptorPool; +using google::protobuf::io::Printer; +using google::protobuf::util::SchemaGroupStripper; +using google::protobuf::util::SchemaAddZeroEnumValue; + +namespace google { +namespace protobuf { +namespace compiler { + +namespace { + +string StripProto(string filename) { + if (filename.substr(filename.size() - 11) == ".protodevel") { + // .protodevel + return filename.substr(0, filename.size() - 11); + } else { + // .proto + return filename.substr(0, filename.size() - 6); + } +} + +DescriptorPool new_pool_; + +} // namespace + +class GoGoProtoGenerator : public CodeGenerator { + public: + virtual bool GenerateAll(const std::vector& files, + const string& parameter, + GeneratorContext* context, + string* error) const { + for (int i = 0; i < files.size(); i++) { + for (auto file : files) { + bool can_generate = + (new_pool_.FindFileByName(file->name()) == nullptr); + for (int j = 0; j < file->dependency_count(); j++) { + can_generate &= (new_pool_.FindFileByName( + file->dependency(j)->name()) != nullptr); + } + for (int j = 0; j < file->public_dependency_count(); j++) { + can_generate &= (new_pool_.FindFileByName( + file->public_dependency(j)->name()) != nullptr); + } + for (int j = 0; j < file->weak_dependency_count(); j++) { + can_generate &= (new_pool_.FindFileByName( + file->weak_dependency(j)->name()) != nullptr); + } + if (can_generate) { + Generate(file, parameter, context, error); + break; + } + } + } + + return true; + } + + virtual bool Generate(const FileDescriptor* file, + const string& parameter, + GeneratorContext* context, + string* error) const { + FileDescriptorProto new_file; + file->CopyTo(&new_file); + SchemaGroupStripper::StripFile(file, &new_file); + + SchemaAddZeroEnumValue enum_scrubber; + enum_scrubber.ScrubFile(&new_file); + + string filename = file->name(); + string basename = StripProto(filename); + + std::vector> option_pairs; + ParseGeneratorParameter(parameter, &option_pairs); + + std::unique_ptr output( + context->Open(basename + ".proto")); + string content = new_pool_.BuildFile(new_file)->DebugString(); + Printer printer(output.get(), '$'); + printer.WriteRaw(content.c_str(), content.size()); + + return true; + } +}; + +} // namespace compiler +} // namespace protobuf +} // namespace google + +int main(int argc, char* argv[]) { + google::protobuf::compiler::GoGoProtoGenerator generator; + return google::protobuf::compiler::PluginMain(argc, argv, &generator); +} diff --git a/benchmarks/util/schema_proto2_to_proto3_util.h b/benchmarks/util/schema_proto2_to_proto3_util.h new file mode 100644 index 00000000..089012dd --- /dev/null +++ b/benchmarks/util/schema_proto2_to_proto3_util.h @@ -0,0 +1,137 @@ +#ifndef PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_ +#define PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_ + +#include "google/protobuf/message.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" + +#include +#include + +using google::protobuf::Descriptor; +using google::protobuf::DescriptorProto; +using google::protobuf::FileDescriptorProto; +using google::protobuf::FieldDescriptorProto; +using google::protobuf::Message; +using google::protobuf::EnumValueDescriptorProto; + +namespace google { +namespace protobuf { +namespace util { + +class SchemaGroupStripper { + + public: + static void StripFile(const FileDescriptor* old_file, + FileDescriptorProto *file) { + for (int i = file->mutable_message_type()->size() - 1; i >= 0; i--) { + if (IsMessageSet(old_file->message_type(i))) { + file->mutable_message_type()->DeleteSubrange(i, 1); + continue; + } + StripMessage(old_file->message_type(i), file->mutable_message_type(i)); + } + for (int i = file->mutable_extension()->size() - 1; i >= 0; i--) { + auto field = old_file->extension(i); + if (field->type() == FieldDescriptor::TYPE_GROUP || + IsMessageSet(field->message_type()) || + IsMessageSet(field->containing_type())) { + file->mutable_extension()->DeleteSubrange(i, 1); + } + } + } + + private: + static bool IsMessageSet(const Descriptor *descriptor) { + if (descriptor != nullptr + && descriptor->options().message_set_wire_format()) { + return true; + } + return false; + } + + static void StripMessage(const Descriptor *old_message, + DescriptorProto *new_message) { + for (int i = new_message->mutable_field()->size() - 1; i >= 0; i--) { + if (old_message->field(i)->type() == FieldDescriptor::TYPE_GROUP || + IsMessageSet(old_message->field(i)->message_type())) { + new_message->mutable_field()->DeleteSubrange(i, 1); + } + } + for (int i = new_message->mutable_extension()->size() - 1; i >= 0; i--) { + auto field_type_name = new_message->mutable_extension(i)->type_name(); + if (old_message->extension(i)->type() == FieldDescriptor::TYPE_GROUP || + IsMessageSet(old_message->extension(i)->containing_type()) || + IsMessageSet(old_message->extension(i)->message_type())) { + new_message->mutable_extension()->DeleteSubrange(i, 1); + } + } + for (int i = 0; i < new_message->mutable_nested_type()->size(); i++) { + StripMessage(old_message->nested_type(i), + new_message->mutable_nested_type(i)); + } + } + +}; + +class SchemaAddZeroEnumValue { + + public: + SchemaAddZeroEnumValue() + : total_added_(0) { + } + + void ScrubFile(FileDescriptorProto *file) { + for (int i = 0; i < file->enum_type_size(); i++) { + ScrubEnum(file->mutable_enum_type(i)); + } + for (int i = 0; i < file->mutable_message_type()->size(); i++) { + ScrubMessage(file->mutable_message_type(i)); + } + } + + private: + void ScrubEnum(EnumDescriptorProto *enum_type) { + if (enum_type->value(0).number() != 0) { + bool has_zero = false; + for (int j = 0; j < enum_type->value().size(); j++) { + if (enum_type->value(j).number() == 0) { + EnumValueDescriptorProto temp_enum_value; + temp_enum_value.CopyFrom(enum_type->value(j)); + enum_type->mutable_value(j)->CopyFrom(enum_type->value(0)); + enum_type->mutable_value(0)->CopyFrom(temp_enum_value); + has_zero = true; + break; + } + } + if (!has_zero) { + enum_type->mutable_value()->Add(); + for (int i = enum_type->mutable_value()->size() - 1; i > 0; i--) { + enum_type->mutable_value(i)->CopyFrom( + *enum_type->mutable_value(i - 1)); + } + enum_type->mutable_value(0)->set_number(0); + enum_type->mutable_value(0)->set_name("ADDED_ZERO_VALUE_" + + std::to_string(total_added_++)); + } + } + + } + + void ScrubMessage(DescriptorProto *message_type) { + for (int i = 0; i < message_type->mutable_enum_type()->size(); i++) { + ScrubEnum(message_type->mutable_enum_type(i)); + } + for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) { + ScrubMessage(message_type->mutable_nested_type(i)); + } + } + + int total_added_; +}; + +} // namespace util +} // namespace protobuf +} // namespace google + +#endif // PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_ -- cgit v1.2.3