diff options
Diffstat (limited to 'benchmarks')
-rw-r--r-- | benchmarks/Makefile.am | 71 | ||||
-rw-r--r-- | benchmarks/README.md | 2 | ||||
-rwxr-xr-x | benchmarks/java/pom.xml | 5 | ||||
-rwxr-xr-x | benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java | 32 | ||||
-rwxr-xr-x | benchmarks/python/py_benchmark.py | 17 | ||||
-rwxr-xr-x | benchmarks/util/big_query_utils.py | 27 | ||||
-rw-r--r-- | benchmarks/util/data_proto2_to_proto3_util.h | 64 | ||||
-rw-r--r-- | benchmarks/util/gogo_data_scrubber.cc | 39 | ||||
-rw-r--r-- | benchmarks/util/proto3_data_stripper.cc | 74 | ||||
-rw-r--r-- | benchmarks/util/protoc-gen-gogoproto.cc | 4 | ||||
-rw-r--r-- | benchmarks/util/protoc-gen-proto2_to_proto3.cc | 115 | ||||
-rwxr-xr-x | benchmarks/util/run_and_upload.py | 14 | ||||
-rw-r--r-- | benchmarks/util/schema_proto2_to_proto3_util.h | 61 |
13 files changed, 413 insertions, 112 deletions
diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index 564fdab3..9f609228 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -126,8 +126,10 @@ java_benchmark_testing_files = \ java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java javac_middleman: $(java_benchmark_testing_files) protoc_middleman protoc_middleman2 - cp -r $(srcdir)/java tmp && cd tmp/java && mvn clean compile assembly:single - cd ../.. + cp -r $(srcdir)/java tmp + mkdir -p tmp/java/lib + cp $(top_srcdir)/java/core/target/*.jar tmp/java/lib/protobuf-java.jar + cd tmp/java && mvn clean compile assembly:single -Dprotobuf.version=$(PACKAGE_VERSION) && cd ../.. @touch javac_middleman java-benchmark: javac_middleman @@ -137,10 +139,10 @@ java-benchmark: javac_middleman @echo 'conf=()' >> java-benchmark @echo 'data_files=""' >> java-benchmark @echo 'for arg in $$@; do if [[ $${arg:0:1} == "-" ]]; then conf+=($$arg); else data_files+="$$arg,"; fi; done' >> java-benchmark - @echo 'java -cp '"tmp/java/target/*.jar"' com.google.caliper.runner.CaliperMain com.google.protobuf.ProtoCaliperBenchmark -i runtime '"\\" >> java-benchmark - @echo '-b serializeToByteString,serializeToByteArray,serializeToMemoryStream,'"\\" >> java-benchmark - @echo 'deserializeFromByteString,deserializeFromByteArray,deserializeFromMemoryStream '"\\" >> java-benchmark - @echo '-DdataFile=$${data_files:0:-1} $${conf[*]}' >> java-benchmark + @echo 'java -cp '\"tmp/java/target/*:$(top_srcdir)/java/core/target/*:$(top_srcdir)/java/util/target/*\"" \\" >>java-benchmark + @echo ' com.google.caliper.runner.CaliperMain com.google.protobuf.ProtoCaliperBenchmark -i runtime '"\\" >> java-benchmark + @echo ' -b serializeToByteArray,serializeToMemoryStream,deserializeFromByteArray,deserializeFromMemoryStream '"\\" >> java-benchmark + @echo ' -DdataFile=$${data_files:0:-1} $${conf[*]}' >> java-benchmark @chmod +x java-benchmark java: protoc_middleman protoc_middleman2 java-benchmark @@ -179,8 +181,8 @@ nodist_libbenchmark_messages_la_SOURCES = \ python-pure-python-benchmark: python_add_init @echo "Writing shortcut script python-pure-python-benchmark..." @echo '#! /bin/bash' > python-pure-python-benchmark - @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark - @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark + @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/.libs >> python-pure-python-benchmark + @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/.libs >> python-pure-python-benchmark @echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'python\' >> python-pure-python-benchmark @echo cp $(srcdir)/python/py_benchmark.py tmp >> python-pure-python-benchmark @echo python tmp/py_benchmark.py '$$@' >> python-pure-python-benchmark @@ -189,8 +191,8 @@ python-pure-python-benchmark: python_add_init python-cpp-reflection-benchmark: python_add_init @echo "Writing shortcut script python-cpp-reflection-benchmark..." @echo '#! /bin/bash' > python-cpp-reflection-benchmark - @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark - @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark + @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/.libs >> python-cpp-reflection-benchmark + @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/.libs >> python-cpp-reflection-benchmark @echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-reflection-benchmark @echo cp $(srcdir)/python/py_benchmark.py tmp >> python-cpp-reflection-benchmark @echo python tmp/py_benchmark.py '$$@' >> python-cpp-reflection-benchmark @@ -199,8 +201,8 @@ python-cpp-reflection-benchmark: python_add_init python-cpp-generated-code-benchmark: python_add_init libbenchmark_messages.la @echo "Writing shortcut script python-cpp-generated-code-benchmark..." @echo '#! /bin/bash' > python-cpp-generated-code-benchmark - @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark - @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark + @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/.libs >> python-cpp-generated-code-benchmark + @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/.libs >> python-cpp-generated-code-benchmark @echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-generated-code-benchmark @echo cp $(srcdir)/python/py_benchmark.py tmp >> python-cpp-generated-code-benchmark @echo python tmp/py_benchmark.py --cpp_generated '$$@' >> python-cpp-generated-code-benchmark @@ -347,11 +349,11 @@ gogo_proto_middleman: protoc-gen-gogoproto oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I$(srcdir) -I$(top_srcdir) --plugin=protoc-gen-gogoproto --gogoproto_out=$$oldpwd/tmp/gogo_proto $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) $(benchmarks_protoc_inputs_proto2) ) touch gogo_proto_middleman -new_data = $$(for data in $(all_data); do echo "tmp$${data\#$(srcdir)}"; done | xargs) +gogo_data = $$(for data in $(all_data); do echo "tmp/gogo_data$${data\#$(srcdir)}"; done | xargs) generate_gogo_data: protoc_middleman protoc_middleman2 gogo-data-scrubber - mkdir -p `dirname $(new_data)` - ./gogo-data-scrubber $(all_data) $(new_data) + mkdir -p `dirname $(gogo_data)` + ./gogo-data-scrubber $(all_data) $(gogo_data) touch generate_gogo_data make_tmp_dir_gogo: @@ -406,8 +408,6 @@ gogoslick_protoc_middleman: make_tmp_dir_gogo $(top_srcdir)/src/protoc$(EXEEXT) oldpwd=`pwd` && ( cd $(srcdir)/tmp/gogo_proto && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$$oldpwd/$(top_srcdir)/src --gogoslick_out=$$oldpwd/tmp/gogoslick $(benchmarks_protoc_inputs_proto2_message4) ) touch gogoslick_protoc_middleman -gogo_data = $$(find . -type f -name "dataset.*.pb" -path "./tmp/*") - generate-gogo-benchmark-code: @echo '#! /bin/bash' > generate-gogo-benchmark-code @echo 'cp $(srcdir)/go/go_benchmark_test.go tmp/$$1/benchmark_code/$$1_benchmark1_test.go' >> generate-gogo-benchmark-code @@ -451,7 +451,7 @@ gogoslick: gogoslick_protoc_middleman generate_gogo_data gogo-benchmark generat ############ UTIL RULES BEGIN ############ -bin_PROGRAMS += protoc-gen-gogoproto gogo-data-scrubber +bin_PROGRAMS += protoc-gen-gogoproto gogo-data-scrubber protoc-gen-proto2_to_proto3 proto3-data-stripper protoc_gen_gogoproto_LDADD = $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/src/libprotoc.la protoc_gen_gogoproto_SOURCES = util/protoc-gen-gogoproto.cc @@ -466,9 +466,40 @@ nodist_gogo_data_scrubber_SOURCES = \ $(benchmarks_protoc_outputs_proto2) \ $(benchmarks_protoc_outputs_proto2_header) \ $(benchmarks_protoc_outputs_header) + +protoc_gen_proto2_to_proto3_LDADD = $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/src/libprotoc.la +protoc_gen_proto2_to_proto3_SOURCES = util/protoc-gen-proto2_to_proto3.cc +protoc_gen_proto2_to_proto3_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)/cpp -I$(srcdir)/util + +proto3_data_stripper_LDADD = $(top_srcdir)/src/libprotobuf.la +proto3_data_stripper_SOURCES = util/proto3_data_stripper.cc +proto3_data_stripper_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)/cpp -I$(srcdir)/util +util/proto3_data_stripper-proto3_data_stripper.$(OBJEXT): $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) +nodist_proto3_data_stripper_SOURCES = \ + $(benchmarks_protoc_outputs) \ + $(benchmarks_protoc_outputs_proto2) \ + $(benchmarks_protoc_outputs_proto2_header) \ + $(benchmarks_protoc_outputs_header) + ############ UTIL RULES END ############ +############ PROTO3 PREPARATION BEGIN ############# + +proto3_proto_middleman: protoc-gen-proto2_to_proto3 + mkdir -p "tmp/proto3_proto" + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I$(srcdir) -I$(top_srcdir) --plugin=protoc-gen-proto2_to_proto3 --proto2_to_proto3_out=$$oldpwd/tmp/proto3_proto $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) $(benchmarks_protoc_inputs_proto2) ) + touch proto3_proto_middleman + +proto3_data = $$(for data in $(all_data); do echo "tmp/proto3_data$${data\#$(srcdir)}"; done | xargs) + +generate_proto3_data: protoc_middleman protoc_middleman2 proto3-data-stripper + mkdir -p `dirname $(proto3_data)` + ./proto3-data-stripper $(all_data) $(proto3_data) + touch generate_proto3_data + +############ PROTO3 PREPARATION END ############# + MAINTAINERCLEANFILES = \ Makefile.in @@ -511,7 +542,9 @@ CLEANFILES = \ gogoslick_protoc_middleman \ gogoslick \ gogo-benchmark \ - gogo/cpp_no_group/cpp_benchmark.* + gogo/cpp_no_group/cpp_benchmark.* \ + proto3_proto_middleman \ + generate_proto3_data clean-local: diff --git a/benchmarks/README.md b/benchmarks/README.md index 21cd7352..ae5c7ddd 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -5,7 +5,7 @@ This directory contains benchmarking schemas and data sets that you can use to test a variety of performance scenarios against your protobuf language runtime. If you are looking for performance numbers of officially support languages, see [here]( -https://github.com/google/protobuf/blob/master/docs/Performance.md) +https://github.com/google/protobuf/blob/master/docs/performance.md) ## Prerequisite diff --git a/benchmarks/java/pom.xml b/benchmarks/java/pom.xml index c2cd78a1..570bd664 100755 --- a/benchmarks/java/pom.xml +++ b/benchmarks/java/pom.xml @@ -14,7 +14,10 @@ <dependency> <groupId>com.google.protobuf</groupId> <artifactId>protobuf-java</artifactId> - <version>3.5.0</version> + <version>${protobuf.version}</version> + <type>jar</type> + <scope>system</scope> + <systemPath>${project.basedir}/lib/protobuf-java.jar</systemPath> </dependency> <dependency> <groupId>com.google.caliper</groupId> diff --git a/benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java b/benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java index 94568aea..c766d74e 100755 --- a/benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java +++ b/benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java @@ -5,6 +5,7 @@ import com.google.caliper.BeforeExperiment; import com.google.caliper.AfterExperiment; import com.google.caliper.Benchmark; import com.google.caliper.Param; +import com.google.caliper.api.VmOptions; import com.google.protobuf.ByteString; import com.google.protobuf.CodedOutputStream; import com.google.protobuf.ExtensionRegistry; @@ -22,6 +23,12 @@ import java.io.RandomAccessFile; import java.util.ArrayList; import java.util.List; +// Caliper set CICompilerCount to 1 for making sure compilation doesn't run in parallel with itself, +// This makes TieredCompilation not working. We just disable TieredCompilation by default. In master +// branch this has been disabled by default in caliper: +// https://github.com/google/caliper/blob/master/caliper-runner/src/main/java/com/google/caliper/runner/target/Jvm.java#L38:14 +// But this haven't been added into most recent release. +@VmOptions("-XX:-TieredCompilation") public class ProtoCaliperBenchmark { public enum BenchmarkMessageType { GOOGLE_MESSAGE1_PROTO3 { @@ -152,18 +159,6 @@ public class ProtoCaliperBenchmark { @Benchmark - void serializeToByteString(int reps) throws IOException { - if (sampleMessageList.size() == 0) { - return; - } - for (int i = 0; i < reps; i++) { - for (int j = 0; j < sampleMessageList.size(); j++) { - sampleMessageList.get(j).toByteString(); - } - } - } - - @Benchmark void serializeToByteArray(int reps) throws IOException { if (sampleMessageList.size() == 0) { return; @@ -189,19 +184,6 @@ public class ProtoCaliperBenchmark { } @Benchmark - void deserializeFromByteString(int reps) throws IOException { - if (inputStringList.size() == 0) { - return; - } - for (int i = 0; i < reps; i++) { - for (int j = 0; j < inputStringList.size(); j++) { - benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( - inputStringList.get(j), extensions); - } - } - } - - @Benchmark void deserializeFromByteArray(int reps) throws IOException { if (inputDataList.size() == 0) { return; diff --git a/benchmarks/python/py_benchmark.py b/benchmarks/python/py_benchmark.py index 6942d208..e86b61e7 100755 --- a/benchmarks/python/py_benchmark.py +++ b/benchmarks/python/py_benchmark.py @@ -1,3 +1,4 @@ +from __future__ import print_function import sys import os import timeit @@ -138,15 +139,15 @@ if __name__ == "__main__": results.append(run_one_test(file)) if args.json != "no": - print json.dumps(results) + print(json.dumps(results)) else: for result in results: - print "Message %s of dataset file %s" % \ - (result["message_name"], result["filename"]) - print "Average time for parse_from_benchmark: %.2f ns" % \ + print("Message %s of dataset file %s" % \ + (result["message_name"], result["filename"])) + print("Average time for parse_from_benchmark: %.2f ns" % \ (result["benchmarks"][ \ - args.behavior_prefix + "_parse_from_benchmark"]) - print "Average time for serialize_to_benchmark: %.2f ns" % \ + args.behavior_prefix + "_parse_from_benchmark"])) + print("Average time for serialize_to_benchmark: %.2f ns" % \ (result["benchmarks"][ \ - args.behavior_prefix + "_serialize_to_benchmark"]) - print "" + args.behavior_prefix + "_serialize_to_benchmark"])) + print("") diff --git a/benchmarks/util/big_query_utils.py b/benchmarks/util/big_query_utils.py index 14105aa6..aea55bbd 100755 --- a/benchmarks/util/big_query_utils.py +++ b/benchmarks/util/big_query_utils.py @@ -1,5 +1,6 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import argparse import json import uuid @@ -37,11 +38,11 @@ def create_dataset(biq_query, project_id, dataset_id): dataset_req.execute(num_retries=NUM_RETRIES) except HttpError as http_error: if http_error.resp.status == 409: - print 'Warning: The dataset %s already exists' % dataset_id + print('Warning: The dataset %s already exists' % dataset_id) else: # Note: For more debugging info, print "http_error.content" - print 'Error in creating dataset: %s. Err: %s' % (dataset_id, - http_error) + print('Error in creating dataset: %s. Err: %s' % (dataset_id, + http_error)) is_success = False return is_success @@ -109,13 +110,13 @@ def create_table2(big_query, table_req = big_query.tables().insert( projectId=project_id, datasetId=dataset_id, body=body) res = table_req.execute(num_retries=NUM_RETRIES) - print 'Successfully created %s "%s"' % (res['kind'], res['id']) + print('Successfully created %s "%s"' % (res['kind'], res['id'])) except HttpError as http_error: if http_error.resp.status == 409: - print 'Warning: Table %s already exists' % table_id + print('Warning: Table %s already exists' % table_id) else: - print 'Error in creating table: %s. Err: %s' % (table_id, - http_error) + print('Error in creating table: %s. Err: %s' % (table_id, + http_error)) is_success = False return is_success @@ -141,9 +142,9 @@ def patch_table(big_query, project_id, dataset_id, table_id, fields_schema): tableId=table_id, body=body) res = table_req.execute(num_retries=NUM_RETRIES) - print 'Successfully patched %s "%s"' % (res['kind'], res['id']) + print('Successfully patched %s "%s"' % (res['kind'], res['id'])) except HttpError as http_error: - print 'Error in creating table: %s. Err: %s' % (table_id, http_error) + print('Error in creating table: %s. Err: %s' % (table_id, http_error)) is_success = False return is_success @@ -159,10 +160,10 @@ def insert_rows(big_query, project_id, dataset_id, table_id, rows_list): body=body) res = insert_req.execute(num_retries=NUM_RETRIES) if res.get('insertErrors', None): - print 'Error inserting rows! Response: %s' % res + print('Error inserting rows! Response: %s' % res) is_success = False except HttpError as http_error: - print 'Error inserting rows to the table %s' % table_id + print('Error inserting rows to the table %s' % table_id) is_success = False return is_success @@ -176,8 +177,8 @@ def sync_query_job(big_query, project_id, query, timeout=5000): projectId=project_id, body=query_data).execute(num_retries=NUM_RETRIES) except HttpError as http_error: - print 'Query execute job failed with error: %s' % http_error - print http_error.content + print('Query execute job failed with error: %s' % http_error) + print(http_error.content) return query_job diff --git a/benchmarks/util/data_proto2_to_proto3_util.h b/benchmarks/util/data_proto2_to_proto3_util.h new file mode 100644 index 00000000..5eea8509 --- /dev/null +++ b/benchmarks/util/data_proto2_to_proto3_util.h @@ -0,0 +1,64 @@ +#ifndef PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_ +#define PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_ + +#include "google/protobuf/message.h" +#include "google/protobuf/descriptor.h" + +using google::protobuf::FieldDescriptor; +using google::protobuf::Message; +using google::protobuf::Reflection; + +namespace google { +namespace protobuf { +namespace util { + +class DataStripper { + public: + void StripMessage(Message *message) { + std::vector<const FieldDescriptor*> set_fields; + const Reflection* reflection = message->GetReflection(); + reflection->ListFields(*message, &set_fields); + + for (size_t i = 0; i < set_fields.size(); i++) { + const FieldDescriptor* field = set_fields[i]; + if (ShouldBeClear(field)) { + reflection->ClearField(message, field); + continue; + } + if (field->type() == FieldDescriptor::TYPE_MESSAGE) { + if (field->is_repeated()) { + for (int j = 0; j < reflection->FieldSize(*message, field); j++) { + StripMessage(reflection->MutableRepeatedMessage(message, field, j)); + } + } else { + StripMessage(reflection->MutableMessage(message, field)); + } + } + } + + reflection->MutableUnknownFields(message)->Clear(); + } + private: + virtual bool ShouldBeClear(const FieldDescriptor *field) = 0; +}; + +class GogoDataStripper : public DataStripper { + private: + virtual bool ShouldBeClear(const FieldDescriptor *field) { + return field->type() == FieldDescriptor::TYPE_GROUP; + } +}; + +class Proto3DataStripper : public DataStripper { + private: + virtual bool ShouldBeClear(const FieldDescriptor *field) { + return field->type() == FieldDescriptor::TYPE_GROUP || + field->is_extension(); + } +}; + +} // namespace util +} // namespace protobuf +} // namespace google + +#endif // PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_ diff --git a/benchmarks/util/gogo_data_scrubber.cc b/benchmarks/util/gogo_data_scrubber.cc index fb9af6e2..9ef57b0d 100644 --- a/benchmarks/util/gogo_data_scrubber.cc +++ b/benchmarks/util/gogo_data_scrubber.cc @@ -4,43 +4,11 @@ #include "datasets/google_message2/benchmark_message2.pb.h" #include "datasets/google_message3/benchmark_message3.pb.h" #include "datasets/google_message4/benchmark_message4.pb.h" - -#include "google/protobuf/message.h" -#include "google/protobuf/descriptor.h" +#include "data_proto2_to_proto3_util.h" #include <fstream> -using google::protobuf::FieldDescriptor; -using google::protobuf::Message; -using google::protobuf::Reflection; - - -class DataGroupStripper { - public: - static void StripMessage(Message *message) { - std::vector<const FieldDescriptor*> set_fields; - const Reflection* reflection = message->GetReflection(); - reflection->ListFields(*message, &set_fields); - - for (size_t i = 0; i < set_fields.size(); i++) { - const FieldDescriptor* field = set_fields[i]; - if (field->type() == FieldDescriptor::TYPE_GROUP) { - reflection->ClearField(message, field); - } - if (field->type() == FieldDescriptor::TYPE_MESSAGE) { - if (field->is_repeated()) { - for (int j = 0; j < reflection->FieldSize(*message, field); j++) { - StripMessage(reflection->MutableRepeatedMessage(message, field, j)); - } - } else { - StripMessage(reflection->MutableMessage(message, field)); - } - } - } - - reflection->MutableUnknownFields(message)->Clear(); - } -}; +using google::protobuf::util::GogoDataStripper; std::string ReadFile(const std::string& name) { std::ifstream file(name.c_str()); @@ -91,7 +59,8 @@ int main(int argc, char *argv[]) { for (int i = 0; i < dataset.payload_size(); i++) { message->ParseFromString(dataset.payload(i)); - DataGroupStripper::StripMessage(message); + GogoDataStripper stripper; + stripper.StripMessage(message); dataset.set_payload(i, message->SerializeAsString()); } diff --git a/benchmarks/util/proto3_data_stripper.cc b/benchmarks/util/proto3_data_stripper.cc new file mode 100644 index 00000000..3096c4c1 --- /dev/null +++ b/benchmarks/util/proto3_data_stripper.cc @@ -0,0 +1,74 @@ +#include "benchmarks.pb.h" +#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" +#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" +#include "datasets/google_message2/benchmark_message2.pb.h" +#include "datasets/google_message3/benchmark_message3.pb.h" +#include "datasets/google_message4/benchmark_message4.pb.h" +#include "data_proto2_to_proto3_util.h" + +#include <fstream> + +using google::protobuf::util::Proto3DataStripper; + +std::string ReadFile(const std::string& name) { + std::ifstream file(name.c_str()); + GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" + << name + << "', please make sure you are running this command from the benchmarks" + << " directory.\n"; + return std::string((std::istreambuf_iterator<char>(file)), + std::istreambuf_iterator<char>()); +} + +int main(int argc, char *argv[]) { + if (argc % 2 == 0 || argc == 1) { + std::cerr << "Usage: [input_files] [output_file_names] where " << + "input_files are one to one mapping to output_file_names." << + std::endl; + return 1; + } + + for (int i = argc / 2; i > 0; i--) { + const std::string &input_file = argv[i]; + const std::string &output_file = argv[i + argc / 2]; + + std::cerr << "Generating " << input_file + << " to " << output_file << std::endl; + benchmarks::BenchmarkDataset dataset; + Message* message; + std::string dataset_payload = ReadFile(input_file); + GOOGLE_CHECK(dataset.ParseFromString(dataset_payload)) + << "Can' t parse data file " << input_file; + + if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") { + message = new benchmarks::proto3::GoogleMessage1; + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") { + message = new benchmarks::proto2::GoogleMessage1; + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") { + message = new benchmarks::proto2::GoogleMessage2; + } else if (dataset.message_name() == + "benchmarks.google_message3.GoogleMessage3") { + message = new benchmarks::google_message3::GoogleMessage3; + } else if (dataset.message_name() == + "benchmarks.google_message4.GoogleMessage4") { + message = new benchmarks::google_message4::GoogleMessage4; + } else { + std::cerr << "Unknown message type: " << dataset.message_name(); + exit(1); + } + + for (int i = 0; i < dataset.payload_size(); i++) { + message->ParseFromString(dataset.payload(i)); + Proto3DataStripper stripper; + stripper.StripMessage(message); + dataset.set_payload(i, message->SerializeAsString()); + } + + std::ofstream ofs(output_file); + ofs << dataset.SerializeAsString(); + ofs.close(); + } + + + return 0; +} diff --git a/benchmarks/util/protoc-gen-gogoproto.cc b/benchmarks/util/protoc-gen-gogoproto.cc index bfa6a5e5..9c1b3d04 100644 --- a/benchmarks/util/protoc-gen-gogoproto.cc +++ b/benchmarks/util/protoc-gen-gogoproto.cc @@ -12,7 +12,7 @@ using google::protobuf::FileDescriptor; using google::protobuf::DescriptorPool; using google::protobuf::io::Printer; using google::protobuf::util::SchemaGroupStripper; -using google::protobuf::util::SchemaAddZeroEnumValue; +using google::protobuf::util::EnumScrubber; namespace google { namespace protobuf { @@ -74,7 +74,7 @@ class GoGoProtoGenerator : public CodeGenerator { file->CopyTo(&new_file); SchemaGroupStripper::StripFile(file, &new_file); - SchemaAddZeroEnumValue enum_scrubber; + EnumScrubber enum_scrubber; enum_scrubber.ScrubFile(&new_file); string filename = file->name(); diff --git a/benchmarks/util/protoc-gen-proto2_to_proto3.cc b/benchmarks/util/protoc-gen-proto2_to_proto3.cc new file mode 100644 index 00000000..d0a89023 --- /dev/null +++ b/benchmarks/util/protoc-gen-proto2_to_proto3.cc @@ -0,0 +1,115 @@ +#include "google/protobuf/compiler/code_generator.h" +#include "google/protobuf/io/zero_copy_stream.h" +#include "google/protobuf/io/printer.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "schema_proto2_to_proto3_util.h" + +#include "google/protobuf/compiler/plugin.h" + +using google::protobuf::FileDescriptorProto; +using google::protobuf::FileDescriptor; +using google::protobuf::DescriptorPool; +using google::protobuf::io::Printer; +using google::protobuf::util::SchemaGroupStripper; +using google::protobuf::util::EnumScrubber; +using google::protobuf::util::ExtensionStripper; +using google::protobuf::util::FieldScrubber; + +namespace google { +namespace protobuf { +namespace compiler { + +namespace { + +string StripProto(string filename) { + return filename.substr(0, filename.rfind(".proto")); +} + +DescriptorPool* GetPool() { + static DescriptorPool *pool = new DescriptorPool(); + return pool; +} + +} // namespace + +class Proto2ToProto3Generator final : public CodeGenerator { + public: + bool GenerateAll(const std::vector<const FileDescriptor*>& files, + const string& parameter, + GeneratorContext* context, + string* error) const { + for (int i = 0; i < files.size(); i++) { + for (auto file : files) { + if (CanGenerate(file)) { + Generate(file, parameter, context, error); + break; + } + } + } + + return true; + } + + bool Generate(const FileDescriptor* file, + const string& parameter, + GeneratorContext* context, + string* error) const { + FileDescriptorProto new_file; + file->CopyTo(&new_file); + SchemaGroupStripper::StripFile(file, &new_file); + + EnumScrubber enum_scrubber; + enum_scrubber.ScrubFile(&new_file); + ExtensionStripper::StripFile(&new_file); + FieldScrubber::ScrubFile(&new_file); + new_file.set_syntax("proto3"); + + string filename = file->name(); + string basename = StripProto(filename); + + std::vector<std::pair<string,string>> option_pairs; + ParseGeneratorParameter(parameter, &option_pairs); + + std::unique_ptr<google::protobuf::io::ZeroCopyOutputStream> output( + context->Open(basename + ".proto")); + string content = GetPool()->BuildFile(new_file)->DebugString(); + Printer printer(output.get(), '$'); + printer.WriteRaw(content.c_str(), content.size()); + + return true; + } + private: + bool CanGenerate(const FileDescriptor* file) const { + if (GetPool()->FindFileByName(file->name()) != nullptr) { + return false; + } + for (int j = 0; j < file->dependency_count(); j++) { + if (GetPool()->FindFileByName(file->dependency(j)->name()) == nullptr) { + return false; + } + } + for (int j = 0; j < file->public_dependency_count(); j++) { + if (GetPool()->FindFileByName( + file->public_dependency(j)->name()) == nullptr) { + return false; + } + } + for (int j = 0; j < file->weak_dependency_count(); j++) { + if (GetPool()->FindFileByName( + file->weak_dependency(j)->name()) == nullptr) { + return false; + } + } + return true; + } +}; + +} // namespace compiler +} // namespace protobuf +} // namespace google + +int main(int argc, char* argv[]) { + google::protobuf::compiler::Proto2ToProto3Generator generator; + return google::protobuf::compiler::PluginMain(argc, argv, &generator); +} diff --git a/benchmarks/util/run_and_upload.py b/benchmarks/util/run_and_upload.py index ae22a668..43c9fa2d 100755 --- a/benchmarks/util/run_and_upload.py +++ b/benchmarks/util/run_and_upload.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from __future__ import absolute_import import argparse import os import re @@ -5,7 +7,7 @@ import copy import uuid import calendar import time -import big_query_utils +from . import big_query_utils import datetime import json # This import depends on the automake rule protoc_middleman, please make sure @@ -255,7 +257,7 @@ def upload_result(result_list, metadata): if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET, _TABLE + "$" + _NOW, [row]): - print 'Error when uploading result', new_result + print('Error when uploading result', new_result) if __name__ == "__main__": @@ -280,11 +282,11 @@ if __name__ == "__main__": parse_go_result(args.go_input_file) metadata = get_metadata() - print "uploading cpp results..." + print("uploading cpp results...") upload_result(cpp_result, metadata) - print "uploading java results..." + print("uploading java results...") upload_result(java_result, metadata) - print "uploading python results..." + print("uploading python results...") upload_result(python_result, metadata) - print "uploading go results..." + print("uploading go results...") upload_result(go_result, metadata) diff --git a/benchmarks/util/schema_proto2_to_proto3_util.h b/benchmarks/util/schema_proto2_to_proto3_util.h index 089012dd..0079f6f1 100644 --- a/benchmarks/util/schema_proto2_to_proto3_util.h +++ b/benchmarks/util/schema_proto2_to_proto3_util.h @@ -74,10 +74,10 @@ class SchemaGroupStripper { }; -class SchemaAddZeroEnumValue { +class EnumScrubber { public: - SchemaAddZeroEnumValue() + EnumScrubber() : total_added_(0) { } @@ -130,6 +130,63 @@ class SchemaAddZeroEnumValue { int total_added_; }; +class ExtensionStripper { + public: + static void StripFile(FileDescriptorProto *file) { + for (int i = 0; i < file->mutable_message_type()->size(); i++) { + StripMessage(file->mutable_message_type(i)); + } + file->mutable_extension()->Clear(); + } + private: + static void StripMessage(DescriptorProto *message_type) { + message_type->mutable_extension()->Clear(); + message_type->clear_extension_range(); + for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) { + StripMessage(message_type->mutable_nested_type(i)); + } + } +}; + + +class FieldScrubber { + public: + static void ScrubFile(FileDescriptorProto *file) { + for (int i = 0; i < file->mutable_message_type()->size(); i++) { + ScrubMessage(file->mutable_message_type(i)); + } + for (int i = 0; i < file->mutable_extension()->size(); i++) { + file->mutable_extension(i)->clear_default_value(); + if (ShouldClearLabel(file->mutable_extension(i))) { + file->mutable_extension(i)->clear_label(); + } + } + } + private: + static bool ShouldClearLabel(const FieldDescriptorProto *field) { + return field->label() == FieldDescriptorProto::LABEL_REQUIRED; + } + + static void ScrubMessage(DescriptorProto *message_type) { + message_type->mutable_extension()->Clear(); + for (int i = 0; i < message_type->mutable_extension()->size(); i++) { + message_type->mutable_extension(i)->clear_default_value(); + if (ShouldClearLabel(message_type->mutable_extension(i))) { + message_type->mutable_extension(i)->clear_label(); + } + } + for (int i = 0; i < message_type->mutable_field()->size(); i++) { + message_type->mutable_field(i)->clear_default_value(); + if (ShouldClearLabel(message_type->mutable_field(i))) { + message_type->mutable_field(i)->clear_label(); + } + } + for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) { + ScrubMessage(message_type->mutable_nested_type(i)); + } + } +}; + } // namespace util } // namespace protobuf } // namespace google |