diff options
author | Yilun Chong <yilunchong@google.com> | 2018-04-10 13:26:17 -0700 |
---|---|---|
committer | Yilun Chong <yilunchong@google.com> | 2018-04-10 13:26:17 -0700 |
commit | 805174eda2356df1b01752c8bc57019e696e0a75 (patch) | |
tree | 97835430abc8b68b25b1b674d6a23e51ad82e7ab /benchmarks | |
parent | ed4321d1cb33199984118d801956822842771e7e (diff) |
Add script for run and upload the benchmark result to bq
Diffstat (limited to 'benchmarks')
-rw-r--r-- | benchmarks/Makefile.am | 114 | ||||
-rw-r--r-- | benchmarks/README.md | 16 | ||||
-rw-r--r-- | benchmarks/cpp/cpp_benchmark.cc (renamed from benchmarks/cpp_benchmark.cc) | 0 | ||||
-rw-r--r-- | benchmarks/go/go_benchmark_test.go (renamed from benchmarks/go_benchmark_test.go) | 12 | ||||
-rwxr-xr-x | benchmarks/java/src/main/java/com/google/protobuf/ProtoBench.java | 98 | ||||
-rwxr-xr-x | benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java | 60 | ||||
-rwxr-xr-x | benchmarks/python/py_benchmark.py (renamed from benchmarks/py_benchmark.py) | 68 | ||||
-rw-r--r-- | benchmarks/python/python_benchmark_messages.cc (renamed from benchmarks/python_benchmark_messages.cc) | 4 | ||||
-rw-r--r-- | benchmarks/util/__init__.py | 0 | ||||
-rwxr-xr-x | benchmarks/util/big_query_utils.py | 188 | ||||
-rwxr-xr-x | benchmarks/util/run_and_upload.py | 290 |
11 files changed, 644 insertions, 206 deletions
diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index 3de0e380..eba256f2 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -28,11 +28,11 @@ make_tmp_dir: if USE_EXTERNAL_PROTOC protoc_middleman: make_tmp_dir $(benchmarks_protoc_inputs) - $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=./tmp $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) + $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=./cpp --java_out=./tmp $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) touch protoc_middleman protoc_middleman2: make_tmp_dir $(benchmarks_protoc_inputs_proto2) - $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=./tmp $(benchmarks_protoc_inputs_proto2) + $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=./cpp --java_out=./tmp $(benchmarks_protoc_inputs_proto2) touch protoc_middleman2 else @@ -41,60 +41,60 @@ else # relative to srcdir, which may not be the same as the current directory when # building out-of-tree. protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) - oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) ) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd/cpp --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) ) touch protoc_middleman protoc_middleman2: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs) - oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2) ) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd/cpp --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2) ) touch protoc_middleman2 endif -all_data = `find . -type f -name "dataset.*.pb"` +all_data = $$(find . -type f -name "dataset.*.pb" -not -path "./tmp/*") ############# CPP RULES ############## benchmarks_protoc_outputs = \ - benchmarks.pb.cc \ - datasets/google_message1/proto3/benchmark_message1_proto3.pb.cc - + cpp/benchmarks.pb.cc \ + cpp/datasets/google_message1/proto3/benchmark_message1_proto3.pb.cc + benchmarks_protoc_outputs_header = \ - benchmarks.pb.h \ - datasets/google_message1/proto3/benchmark_message1_proto3.pb.h + cpp/benchmarks.pb.h \ + cpp/datasets/google_message1/proto3/benchmark_message1_proto3.pb.h benchmarks_protoc_outputs_proto2_header = \ - datasets/google_message1/proto2/benchmark_message1_proto2.pb.h \ - datasets/google_message2/benchmark_message2.pb.h \ - datasets/google_message3/benchmark_message3.pb.h \ - datasets/google_message3/benchmark_message3_1.pb.h \ - datasets/google_message3/benchmark_message3_2.pb.h \ - datasets/google_message3/benchmark_message3_3.pb.h \ - datasets/google_message3/benchmark_message3_4.pb.h \ - datasets/google_message3/benchmark_message3_5.pb.h \ - datasets/google_message3/benchmark_message3_6.pb.h \ - datasets/google_message3/benchmark_message3_7.pb.h \ - datasets/google_message3/benchmark_message3_8.pb.h \ - datasets/google_message4/benchmark_message4.pb.h \ - datasets/google_message4/benchmark_message4_1.pb.h \ - datasets/google_message4/benchmark_message4_2.pb.h \ - datasets/google_message4/benchmark_message4_3.pb.h + cpp/datasets/google_message1/proto2/benchmark_message1_proto2.pb.h \ + cpp/datasets/google_message2/benchmark_message2.pb.h \ + cpp/datasets/google_message3/benchmark_message3.pb.h \ + cpp/datasets/google_message3/benchmark_message3_1.pb.h \ + cpp/datasets/google_message3/benchmark_message3_2.pb.h \ + cpp/datasets/google_message3/benchmark_message3_3.pb.h \ + cpp/datasets/google_message3/benchmark_message3_4.pb.h \ + cpp/datasets/google_message3/benchmark_message3_5.pb.h \ + cpp/datasets/google_message3/benchmark_message3_6.pb.h \ + cpp/datasets/google_message3/benchmark_message3_7.pb.h \ + cpp/datasets/google_message3/benchmark_message3_8.pb.h \ + cpp/datasets/google_message4/benchmark_message4.pb.h \ + cpp/datasets/google_message4/benchmark_message4_1.pb.h \ + cpp/datasets/google_message4/benchmark_message4_2.pb.h \ + cpp/datasets/google_message4/benchmark_message4_3.pb.h benchmarks_protoc_outputs_proto2 = \ - datasets/google_message1/proto2/benchmark_message1_proto2.pb.cc \ - datasets/google_message2/benchmark_message2.pb.cc \ - datasets/google_message3/benchmark_message3.pb.cc \ - datasets/google_message3/benchmark_message3_1.pb.cc \ - datasets/google_message3/benchmark_message3_2.pb.cc \ - datasets/google_message3/benchmark_message3_3.pb.cc \ - datasets/google_message3/benchmark_message3_4.pb.cc \ - datasets/google_message3/benchmark_message3_5.pb.cc \ - datasets/google_message3/benchmark_message3_6.pb.cc \ - datasets/google_message3/benchmark_message3_7.pb.cc \ - datasets/google_message3/benchmark_message3_8.pb.cc \ - datasets/google_message4/benchmark_message4.pb.cc \ - datasets/google_message4/benchmark_message4_1.pb.cc \ - datasets/google_message4/benchmark_message4_2.pb.cc \ - datasets/google_message4/benchmark_message4_3.pb.cc + cpp/datasets/google_message1/proto2/benchmark_message1_proto2.pb.cc \ + cpp/datasets/google_message2/benchmark_message2.pb.cc \ + cpp/datasets/google_message3/benchmark_message3.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_1.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_2.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_3.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_4.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_5.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_6.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_7.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_8.pb.cc \ + cpp/datasets/google_message4/benchmark_message4.pb.cc \ + cpp/datasets/google_message4/benchmark_message4_1.pb.cc \ + cpp/datasets/google_message4/benchmark_message4_2.pb.cc \ + cpp/datasets/google_message4/benchmark_message4_3.pb.cc $(benchmarks_protoc_outputs): protoc_middleman @@ -118,12 +118,12 @@ AM_CXXFLAGS = $(NO_OPT_CXXFLAGS) $(PROTOBUF_OPT_FLAG) -Wall -Wwrite-strings -Wov bin_PROGRAMS = cpp-benchmark cpp_benchmark_LDADD = $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/third_party/benchmark/src/libbenchmark.a -cpp_benchmark_SOURCES = cpp_benchmark.cc -cpp_benchmark_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) -I$(top_srcdir)/third_party/benchmark/include +cpp_benchmark_SOURCES = cpp/cpp_benchmark.cc +cpp_benchmark_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)/cpp -I$(top_srcdir)/third_party/benchmark/include # Explicit deps because BUILT_SOURCES are only done before a "make all/check" # so a direct "make test_cpp" could fail if parallel enough. # See: https://www.gnu.org/software/automake/manual/html_node/Built-Sources-Example.html#Recording-Dependencies-manually -cpp_benchmark-cpp_benchmark.$(OBJEXT): $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/third_party/benchmark/src/libbenchmark.a +cpp/cpp_benchmark-cpp_benchmark.$(OBJEXT): $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/third_party/benchmark/src/libbenchmark.a nodist_cpp_benchmark_SOURCES = \ $(benchmarks_protoc_outputs) \ $(benchmarks_protoc_outputs_proto2) \ @@ -174,11 +174,11 @@ python_add_init: protoc_middleman protoc_middleman2 python_cpp_pkg_flags = `pkg-config --cflags --libs python` lib_LTLIBRARIES = libbenchmark_messages.la -libbenchmark_messages_la_SOURCES = python_benchmark_messages.cc +libbenchmark_messages_la_SOURCES = python/python_benchmark_messages.cc libbenchmark_messages_la_LIBADD = $(top_srcdir)/src/.libs/libprotobuf.la libbenchmark_messages_la_LDFLAGS = -version-info 1:0:0 -export-dynamic -libbenchmark_messages_la_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) $(python_cpp_pkg_flags) -libbenchmark_messages_la-libbenchmark_messages_la.$(OBJEXT): $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) +libbenchmark_messages_la_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)/cpp $(python_cpp_pkg_flags) +libbenchmark_messages_la-python_benchmark_messages.$(OBJEXT): $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) nodist_libbenchmark_messages_la_SOURCES = \ $(benchmarks_protoc_outputs) \ $(benchmarks_protoc_outputs_proto2) \ @@ -191,8 +191,8 @@ python-pure-python-benchmark: python_add_init @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark @echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'python\' >> python-pure-python-benchmark - @echo cp py_benchmark.py tmp >> python-pure-python-benchmark - @echo python tmp/py_benchmark.py false '$$@' >> python-pure-python-benchmark + @echo cp python/py_benchmark.py tmp >> python-pure-python-benchmark + @echo python tmp/py_benchmark.py '$$@' >> python-pure-python-benchmark @chmod +x python-pure-python-benchmark python-cpp-reflection-benchmark: python_add_init @@ -201,8 +201,8 @@ python-cpp-reflection-benchmark: python_add_init @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark @echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-reflection-benchmark - @echo cp py_benchmark.py tmp >> python-cpp-reflection-benchmark - @echo python tmp/py_benchmark.py false '$$@' >> python-cpp-reflection-benchmark + @echo cp python/py_benchmark.py tmp >> python-cpp-reflection-benchmark + @echo python tmp/py_benchmark.py '$$@' >> python-cpp-reflection-benchmark @chmod +x python-cpp-reflection-benchmark python-cpp-generated-code-benchmark: python_add_init libbenchmark_messages.la @@ -211,8 +211,8 @@ python-cpp-generated-code-benchmark: python_add_init libbenchmark_messages.la @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark @echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-generated-code-benchmark - @echo cp py_benchmark.py tmp >> python-cpp-generated-code-benchmark - @echo python tmp/py_benchmark.py true '$$@' >> python-cpp-generated-code-benchmark + @echo cp python/py_benchmark.py tmp >> python-cpp-generated-code-benchmark + @echo python tmp/py_benchmark.py --cpp_generated '$$@' >> python-cpp-generated-code-benchmark @chmod +x python-cpp-generated-code-benchmark python-pure-python: python-pure-python-benchmark @@ -287,9 +287,13 @@ endif go-benchmark: go_protoc_middleman go_protoc_middleman2 @echo "Writing shortcut script go-benchmark..." @echo '#! /bin/sh' > go-benchmark - @echo 'mkdir tmp_cc && mv *.cc tmp_cc' >> go-benchmark - @echo 'go test -bench=. -- $$@' >> go-benchmark - @echo 'mv tmp_cc/* . && rm -rf tmp_cc' >> go-benchmark + @echo 'cd go' >> go-benchmark + @echo 'all_data=""' >> go-benchmark + @echo 'conf=()' >> go-benchmark + @echo 'data_files=()' >> go-benchmark + @echo 'for arg in $$@; do if [[ $${arg:0:1} == "-" ]]; then conf+=($$arg); else data_files+=("../$$arg"); fi; done' >> go-benchmark + @echo 'go test -bench=. $${conf[*]} -- $${data_files[*]}' >> go-benchmark + @echo 'cd ..' >> go-benchmark @chmod +x go-benchmark go: go_protoc_middleman go_protoc_middleman2 go-benchmark diff --git a/benchmarks/README.md b/benchmarks/README.md index 74c042d8..8460a18a 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -107,49 +107,51 @@ $ make python-cpp-generated-code $ make go ``` -To run a specific dataset: +To run a specific dataset or run with specific options: ### Java: ``` $ make java-benchmark -$ ./java-benchmark $(specific generated dataset file name) [-- $(caliper option)] +$ ./java-benchmark $(specific generated dataset file name) [$(caliper options)] ``` ### CPP: ``` $ make cpp-benchmark -$ ./cpp-benchmark $(specific generated dataset file name) +$ ./cpp-benchmark $(specific generated dataset file name) [$(benchmark options)] ``` ### Python: +For Python benchmark we have `--json` for outputing the json result + #### Pure Python: ``` $ make python-pure-python-benchmark -$ ./python-pure-python-benchmark $(specific generated dataset file name) +$ ./python-pure-python-benchmark [--json] $(specific generated dataset file name) ``` #### CPP reflection: ``` $ make python-cpp-reflection-benchmark -$ ./python-cpp-reflection-benchmark $(specific generated dataset file name) +$ ./python-cpp-reflection-benchmark [--json] $(specific generated dataset file name) ``` #### CPP generated code: ``` $ make python-cpp-generated-code-benchmark -$ ./python-cpp-generated-code-benchmark $(specific generated dataset file name) +$ ./python-cpp-generated-code-benchmark [--json] $(specific generated dataset file name) ``` ### Go: ``` $ make go-benchmark -$ ./go-benchmark $(specific generated dataset file name) +$ ./go-benchmark $(specific generated dataset file name) [go testing options] ``` diff --git a/benchmarks/cpp_benchmark.cc b/benchmarks/cpp/cpp_benchmark.cc index f8b55291..f8b55291 100644 --- a/benchmarks/cpp_benchmark.cc +++ b/benchmarks/cpp/cpp_benchmark.cc diff --git a/benchmarks/go_benchmark_test.go b/benchmarks/go/go_benchmark_test.go index e747465e..8c741b71 100644 --- a/benchmarks/go_benchmark_test.go +++ b/benchmarks/go/go_benchmark_test.go @@ -1,12 +1,12 @@ package main import ( - benchmarkWrapper "./tmp" - googleMessage1Proto2 "./tmp/datasets/google_message1/proto2" - googleMessage1Proto3 "./tmp/datasets/google_message1/proto3" - googleMessage2 "./tmp/datasets/google_message2" - googleMessage3 "./tmp/datasets/google_message3" - googleMessage4 "./tmp/datasets/google_message4" + benchmarkWrapper "../tmp" + googleMessage1Proto2 "../tmp/datasets/google_message1/proto2" + googleMessage1Proto3 "../tmp/datasets/google_message1/proto3" + googleMessage2 "../tmp/datasets/google_message2" + googleMessage3 "../tmp/datasets/google_message3" + googleMessage4 "../tmp/datasets/google_message4" "flag" "github.com/golang/protobuf/proto" "io/ioutil" diff --git a/benchmarks/java/src/main/java/com/google/protobuf/ProtoBench.java b/benchmarks/java/src/main/java/com/google/protobuf/ProtoBench.java index f35b180a..02503905 100755 --- a/benchmarks/java/src/main/java/com/google/protobuf/ProtoBench.java +++ b/benchmarks/java/src/main/java/com/google/protobuf/ProtoBench.java @@ -69,40 +69,18 @@ public class ProtoBench { System.err.println("input data is in the format of \"benchmarks.proto\""); System.exit(1); } - boolean success = true; - for (int i = 0; i < args.length; i++) { - success &= runTest(args[i]); - } - System.exit(success ? 0 : 1); + + System.exit(runTest(args) ? 0 : 1); } - - /** - * Runs a single test with specific test data. Error messages are displayed to stderr, - * and the return value indicates general success/failure. - */ - public static boolean runTest(String file) { - byte[] inputData; - BenchmarkDataset benchmarkDataset; - try { - inputData = readAllBytes(file); - benchmarkDataset = BenchmarkDataset.parseFrom(inputData); - } catch (IOException e) { - System.err.println("Unable to get input data"); - return false; - } - List<String> argsList = getCaliperOption(benchmarkDataset); - if (argsList == null) { - System.err.println("Unable to get default message " + benchmarkDataset.getMessageName()); - return false; - } - argsList.add("-DdataFile=" + file); + public static boolean runTest(String args[]) { + List<String> argsList = getCaliperOption(args); argsList.add("com.google.protobuf.ProtoCaliperBenchmark"); try { - String args[] = new String[argsList.size()]; - argsList.toArray(args); - CaliperMain.exitlessMain(args, + String newArgs[] = new String[argsList.size()]; + argsList.toArray(newArgs); + CaliperMain.exitlessMain(newArgs, new PrintWriter(System.out, true), new PrintWriter(System.err, true)); } catch (Exception e) { System.err.println("Error: " + e.getMessage()); @@ -110,54 +88,22 @@ public class ProtoBench { e.printStackTrace(System.err); return false; } - try { - double mininumScale = 0; - // If the file not exist, this will throw IOException, which won't print the warning - // information below. - Scanner scanner = new Scanner(new String(readAllBytes("JavaBenchmarkWarning.txt"))); - while (scanner.hasNext()) { - mininumScale = Math.max(mininumScale, scanner.nextDouble()); - } - scanner.close(); - - System.out.println( - "WARNING: This benchmark's whole iterations are not enough, consider to config caliper to " - + "run for more time to make the result more convincing. You may change the configure " - + "code in com.google.protobuf.ProtoBench.getCaliperOption() of benchmark " - + benchmarkDataset.getMessageName() - + " to run for more time. e.g. Change the value of " - + "instrument.runtime.options.timingInterval or value of " - + "instrument.runtime.options.measurements to be at least " - + Math.round(mininumScale * 10 + 1) / 10.0 - + " times of before, then build and run the benchmark again\n"); - Files.deleteIfExists(Paths.get("JavaBenchmarkWarning.txt")); - } catch (IOException e) { - // The IOException here should be file not found, which means there's no warning generated by - // The benchmark, so this IOException should be discarded. - } return true; } - - private static List<String> getCaliperOption(final BenchmarkDataset benchmarkDataset) { + private static List<String> getCaliperOption(String args[]) { List<String> temp = new ArrayList<String>(); - if (benchmarkDataset.getMessageName().equals("benchmarks.proto3.GoogleMessage1")) { - } else if (benchmarkDataset.getMessageName().equals("benchmarks.proto2.GoogleMessage1")) { - } else if (benchmarkDataset.getMessageName().equals("benchmarks.proto2.GoogleMessage2")) { - } else if (benchmarkDataset.getMessageName(). - equals("benchmarks.google_message3.GoogleMessage3")) { - temp.add("-Cinstrument.runtime.options.timingInterval=3000ms"); - temp.add("-Cinstrument.runtime.options.measurements=20"); - } else if (benchmarkDataset.getMessageName(). - equals("benchmarks.google_message4.GoogleMessage4")) { - temp.add("-Cinstrument.runtime.options.timingInterval=1500ms"); - temp.add("-Cinstrument.runtime.options.measurements=20"); - } else { - return null; - } - temp.add("-i"); temp.add("runtime"); + String files = ""; + for (int i = 0; i < args.length; i++) { + if (args[i].charAt(0) == '-') { + temp.add(args[i]); + } else { + files += (files.equals("") ? "" : ",") + args[i]; + } + } + temp.add("-DdataFile=" + files); temp.add("-b"); String benchmarkNames = "serializeToByteString,serializeToByteArray,serializeToMemoryStream" + ",deserializeFromByteString,deserializeFromByteArray,deserializeFromMemoryStream"; @@ -165,14 +111,4 @@ public class ProtoBench { return temp; } - - public static byte[] readAllBytes(String filename) throws IOException { - if (filename.equals("")) { - return new byte[0]; - } - RandomAccessFile file = new RandomAccessFile(new File(filename), "r"); - byte[] content = new byte[(int) file.length()]; - file.readFully(content); - return content; - } } diff --git a/benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java b/benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java index b50c6585..94568aea 100755 --- a/benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java +++ b/benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java @@ -99,7 +99,6 @@ public class ProtoCaliperBenchmark { private List<ByteArrayInputStream> inputStreamList; private List<ByteString> inputStringList; private List<Message> sampleMessageList; - private long counter; private BenchmarkMessageType getMessageType() throws IOException { if (benchmarkDataset.getMessageName().equals("benchmarks.proto3.GoogleMessage1")) { @@ -149,8 +148,6 @@ public class ProtoCaliperBenchmark { sampleMessageList.add( defaultMessage.newBuilderForType().mergeFrom(singleInputData, extensions).build()); } - - counter = 0; } @@ -160,8 +157,9 @@ public class ProtoCaliperBenchmark { return; } for (int i = 0; i < reps; i++) { - sampleMessageList.get((int) (counter % sampleMessageList.size())).toByteString(); - counter++; + for (int j = 0; j < sampleMessageList.size(); j++) { + sampleMessageList.get(j).toByteString(); + } } } @@ -171,8 +169,9 @@ public class ProtoCaliperBenchmark { return; } for (int i = 0; i < reps; i++) { - sampleMessageList.get((int) (counter % sampleMessageList.size())).toByteArray(); - counter++; + for (int j = 0; j < sampleMessageList.size(); j++) { + sampleMessageList.get(j).toByteArray(); + } } } @@ -182,9 +181,10 @@ public class ProtoCaliperBenchmark { return; } for (int i = 0; i < reps; i++) { - ByteArrayOutputStream output = new ByteArrayOutputStream(); - sampleMessageList.get((int) (counter % sampleMessageList.size())).writeTo(output); - counter++; + for (int j = 0; j < sampleMessageList.size(); j++) { + ByteArrayOutputStream output = new ByteArrayOutputStream(); + sampleMessageList.get(j).writeTo(output); + } } } @@ -194,9 +194,10 @@ public class ProtoCaliperBenchmark { return; } for (int i = 0; i < reps; i++) { - benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( - inputStringList.get((int) (counter % inputStringList.size())), extensions); - counter++; + for (int j = 0; j < inputStringList.size(); j++) { + benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( + inputStringList.get(j), extensions); + } } } @@ -206,9 +207,10 @@ public class ProtoCaliperBenchmark { return; } for (int i = 0; i < reps; i++) { - benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( - inputDataList.get((int) (counter % inputDataList.size())), extensions); - counter++; + for (int j = 0; j < inputDataList.size(); j++) { + benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( + inputDataList.get(j), extensions); + } } } @@ -218,27 +220,11 @@ public class ProtoCaliperBenchmark { return; } for (int i = 0; i < reps; i++) { - benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( - inputStreamList.get((int) (counter % inputStreamList.size())), extensions); - inputStreamList.get((int) (counter % inputStreamList.size())).reset(); - counter++; - } - } - - @AfterExperiment - void checkCounter() throws IOException { - if (counter == 1) { - // Dry run - return; - } - if (benchmarkDataset.getPayloadCount() != 1 - && counter < benchmarkDataset.getPayloadCount() * 10L) { - BufferedWriter writer = new BufferedWriter(new FileWriter("JavaBenchmarkWarning.txt", true)); - // If the total number of non-warmup reps is smaller than 100 times of the total number of - // datasets, then output the scale that need to multiply to the configuration (either extend - // the running time for one timingInterval or run for more measurements). - writer.append(1.0 * benchmarkDataset.getPayloadCount() * 10L / counter + " "); - writer.close(); + for (int j = 0; j < inputStreamList.size(); j++) { + benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( + inputStreamList.get(j), extensions); + inputStreamList.get(j).reset(); + } } } } diff --git a/benchmarks/py_benchmark.py b/benchmarks/python/py_benchmark.py index ba7a3470..a551ba6d 100755 --- a/benchmarks/py_benchmark.py +++ b/benchmarks/python/py_benchmark.py @@ -2,21 +2,35 @@ import sys import os import timeit import math +import argparse import fnmatch - +import json + +parser = argparse.ArgumentParser(description="Python protobuf benchmark") +parser.add_argument("data_files", metavar="dataFile", nargs="+", + help="testing data files.") +parser.add_argument("--json", action="store_const", dest="json", + const="yes", default="no", + help="Whether to output json results") +parser.add_argument("--behavior_prefix", dest="behavior_prefix", + help="The output json format's behavior's name's prefix", + default="") +# BEGIN CPP GENERATED MESSAGE +parser.add_argument("--cpp_generated", action="store_const", + dest="cpp_generated", const="yes", default="no", + help="Whether to link generated code library") +# END CPP GENERATED MESSAGE +args = parser.parse_args() # BEGIN CPP GENERATED MESSAGE # CPP generated code must be linked before importing the generated Python code # for the descriptor can be found in the pool -if len(sys.argv) < 2: - raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code") -if sys.argv[1] == "true": +if args.cpp_generated != "no": sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/.libs" ) import libbenchmark_messages sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/tmp" ) -elif sys.argv[1] != "false": - raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code") # END CPP GENERATED MESSAGE + import datasets.google_message1.proto2.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2 import datasets.google_message1.proto3.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2 import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2 @@ -26,19 +40,24 @@ import benchmarks_pb2 as benchmarks_pb2 def run_one_test(filename): - data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read() + data = open(filename).read() benchmark_dataset = benchmarks_pb2.BenchmarkDataset() benchmark_dataset.ParseFromString(data) benchmark_util = Benchmark(full_iteration=len(benchmark_dataset.payload), module="py_benchmark", setup_method="init") - print "Message %s of dataset file %s" % \ - (benchmark_dataset.message_name, filename) + result={} + result["filename"] = filename + result["message_name"] = benchmark_dataset.message_name + result["benchmarks"] = {} benchmark_util.set_test_method("parse_from_benchmark") - print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) + result["benchmarks"][args.behavior_prefix + "_parse_from_benchmark"] = \ + benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) benchmark_util.set_test_method("serialize_to_benchmark") - print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) - print "" + result["benchmarks"][args.behavior_prefix + "_serialize_to_benchmark"] = \ + benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) + return result + def init(filename): global benchmark_dataset, message_class, message_list, counter @@ -66,11 +85,13 @@ def init(filename): temp.ParseFromString(one_payload) message_list.append(temp) + def parse_from_benchmark(): global counter, message_class, benchmark_dataset m = message_class().ParseFromString(benchmark_dataset.payload[counter % len(benchmark_dataset.payload)]) counter = counter + 1 + def serialize_to_benchmark(): global counter, message_list, message_class s = message_list[counter % len(benchmark_dataset.payload)].SerializeToString() @@ -108,11 +129,22 @@ class Benchmark: t = timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args), setup=self.full_setup_code(setup_method_args), number=reps); - return "Average time for %s: %.2f ns" % \ - (self.test_method, 1.0 * t / reps * (10 ** 9)) - + return 1.0 * t / reps * (10 ** 9) + if __name__ == "__main__": - for i in range(2, len(sys.argv)): - run_one_test(sys.argv[i]) - + results = [] + for file in args.data_files: + results.append(run_one_test(file)) + + if args.json != "no": + print json.dumps(results) + else: + for result in results: + print "Message %s of dataset file %s" % \ + (result["message_name"], result["filename"]) + print "Average time for parse_from_benchmark: %.2f ns" % \ + (result["benchmarks"]["parse_from_benchmark"]) + print "Average time for serialize_to_benchmark: %.2f ns" % \ + (result["benchmarks"]["serialize_to_benchmark"]) + print "" diff --git a/benchmarks/python_benchmark_messages.cc b/benchmarks/python/python_benchmark_messages.cc index 55242a2a..ded16fe9 100644 --- a/benchmarks/python_benchmark_messages.cc +++ b/benchmarks/python/python_benchmark_messages.cc @@ -1,8 +1,8 @@ #include <Python.h> #include "benchmarks.pb.h" -#include "datasets/google_message1/benchmark_message1_proto2.pb.h" -#include "datasets/google_message1/benchmark_message1_proto3.pb.h" +#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" +#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" #include "datasets/google_message2/benchmark_message2.pb.h" #include "datasets/google_message3/benchmark_message3.pb.h" #include "datasets/google_message4/benchmark_message4.pb.h" diff --git a/benchmarks/util/__init__.py b/benchmarks/util/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/benchmarks/util/__init__.py diff --git a/benchmarks/util/big_query_utils.py b/benchmarks/util/big_query_utils.py new file mode 100755 index 00000000..14105aa6 --- /dev/null +++ b/benchmarks/util/big_query_utils.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python2.7 + +import argparse +import json +import uuid +import httplib2 + +from apiclient import discovery +from apiclient.errors import HttpError +from oauth2client.client import GoogleCredentials + +# 30 days in milliseconds +_EXPIRATION_MS = 30 * 24 * 60 * 60 * 1000 +NUM_RETRIES = 3 + + +def create_big_query(): + """Authenticates with cloud platform and gets a BiqQuery service object + """ + creds = GoogleCredentials.get_application_default() + return discovery.build( + 'bigquery', 'v2', credentials=creds, cache_discovery=False) + + +def create_dataset(biq_query, project_id, dataset_id): + is_success = True + body = { + 'datasetReference': { + 'projectId': project_id, + 'datasetId': dataset_id + } + } + + try: + dataset_req = biq_query.datasets().insert( + projectId=project_id, body=body) + dataset_req.execute(num_retries=NUM_RETRIES) + except HttpError as http_error: + if http_error.resp.status == 409: + print 'Warning: The dataset %s already exists' % dataset_id + else: + # Note: For more debugging info, print "http_error.content" + print 'Error in creating dataset: %s. Err: %s' % (dataset_id, + http_error) + is_success = False + return is_success + + +def create_table(big_query, project_id, dataset_id, table_id, table_schema, + description): + fields = [{ + 'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + return create_table2(big_query, project_id, dataset_id, table_id, fields, + description) + + +def create_partitioned_table(big_query, + project_id, + dataset_id, + table_id, + table_schema, + description, + partition_type='DAY', + expiration_ms=_EXPIRATION_MS): + """Creates a partitioned table. By default, a date-paritioned table is created with + each partition lasting 30 days after it was last modified. + """ + fields = [{ + 'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + return create_table2(big_query, project_id, dataset_id, table_id, fields, + description, partition_type, expiration_ms) + + +def create_table2(big_query, + project_id, + dataset_id, + table_id, + fields_schema, + description, + partition_type=None, + expiration_ms=None): + is_success = True + + body = { + 'description': description, + 'schema': { + 'fields': fields_schema + }, + 'tableReference': { + 'datasetId': dataset_id, + 'projectId': project_id, + 'tableId': table_id + } + } + + if partition_type and expiration_ms: + body["timePartitioning"] = { + "type": partition_type, + "expirationMs": expiration_ms + } + + try: + table_req = big_query.tables().insert( + projectId=project_id, datasetId=dataset_id, body=body) + res = table_req.execute(num_retries=NUM_RETRIES) + print 'Successfully created %s "%s"' % (res['kind'], res['id']) + except HttpError as http_error: + if http_error.resp.status == 409: + print 'Warning: Table %s already exists' % table_id + else: + print 'Error in creating table: %s. Err: %s' % (table_id, + http_error) + is_success = False + return is_success + + +def patch_table(big_query, project_id, dataset_id, table_id, fields_schema): + is_success = True + + body = { + 'schema': { + 'fields': fields_schema + }, + 'tableReference': { + 'datasetId': dataset_id, + 'projectId': project_id, + 'tableId': table_id + } + } + + try: + table_req = big_query.tables().patch( + projectId=project_id, + datasetId=dataset_id, + tableId=table_id, + body=body) + res = table_req.execute(num_retries=NUM_RETRIES) + print 'Successfully patched %s "%s"' % (res['kind'], res['id']) + except HttpError as http_error: + print 'Error in creating table: %s. Err: %s' % (table_id, http_error) + is_success = False + return is_success + + +def insert_rows(big_query, project_id, dataset_id, table_id, rows_list): + is_success = True + body = {'rows': rows_list} + try: + insert_req = big_query.tabledata().insertAll( + projectId=project_id, + datasetId=dataset_id, + tableId=table_id, + body=body) + res = insert_req.execute(num_retries=NUM_RETRIES) + if res.get('insertErrors', None): + print 'Error inserting rows! Response: %s' % res + is_success = False + except HttpError as http_error: + print 'Error inserting rows to the table %s' % table_id + is_success = False + + return is_success + + +def sync_query_job(big_query, project_id, query, timeout=5000): + query_data = {'query': query, 'timeoutMs': timeout} + query_job = None + try: + query_job = big_query.jobs().query( + projectId=project_id, + body=query_data).execute(num_retries=NUM_RETRIES) + except HttpError as http_error: + print 'Query execute job failed with error: %s' % http_error + print http_error.content + return query_job + + + # List of (column name, column type, description) tuples +def make_row(unique_row_id, row_values_dict): + """row_values_dict is a dictionary of column name and column value. + """ + return {'insertId': unique_row_id, 'json': row_values_dict} diff --git a/benchmarks/util/run_and_upload.py b/benchmarks/util/run_and_upload.py new file mode 100755 index 00000000..ae22a668 --- /dev/null +++ b/benchmarks/util/run_and_upload.py @@ -0,0 +1,290 @@ +import argparse +import os +import re +import copy +import uuid +import calendar +import time +import big_query_utils +import datetime +import json +# This import depends on the automake rule protoc_middleman, please make sure +# protoc_middleman has been built before run this file. +import os.path, sys +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +import tmp.benchmarks_pb2 as benchmarks_pb2 +from click.types import STRING + +_PROJECT_ID = 'grpc-testing' +_DATASET = 'protobuf_benchmark_result' +_TABLE = 'opensource_result_v1' +_NOW = "%d%02d%02d" % (datetime.datetime.now().year, + datetime.datetime.now().month, + datetime.datetime.now().day) + +file_size_map = {} + +def get_data_size(file_name): + if file_name in file_size_map: + return file_size_map[file_name] + benchmark_dataset = benchmarks_pb2.BenchmarkDataset() + benchmark_dataset.ParseFromString( + open(os.path.dirname(os.path.abspath(__file__)) + "/../" + file_name).read()) + size = 0 + count = 0 + for payload in benchmark_dataset.payload: + size += len(payload) + count += 1 + file_size_map[file_name] = (size, 1.0 * size / count) + return size, 1.0 * size / count + + +def extract_file_name(file_name): + name_list = re.split("[/\.]", file_name) + short_file_name = "" + for name in name_list: + if name[:14] == "google_message": + short_file_name = name + return short_file_name + + +cpp_result = [] +python_result = [] +java_result = [] +go_result = [] + + +# CPP results example: +# [ +# "benchmarks": [ +# { +# "bytes_per_second": int, +# "cpu_time": int, +# "name: string, +# "time_unit: string, +# ... +# }, +# ... +# ], +# ... +# ] +def parse_cpp_result(filename): + global cpp_result + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + results = json.loads(f.read()) + for benchmark in results["benchmarks"]: + data_filename = "".join( + re.split("(_parse_|_serialize)", benchmark["name"])[0]) + behavior = benchmark["name"][len(data_filename) + 1:] + cpp_result.append({ + "language": "cpp", + "dataFileName": data_filename, + "behavior": behavior, + "throughput": benchmark["bytes_per_second"] / 2.0 ** 20 + }) + + +# Python results example: +# [ +# [ +# { +# "filename": string, +# "benchmarks": { +# behavior: results, +# ... +# }, +# "message_name": STRING +# }, +# ... +# ], #pure-python +# ... +# ] +def parse_python_result(filename): + global python_result + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + results_list = json.loads(f.read()) + for results in results_list: + for result in results: + _, avg_size = get_data_size(result["filename"]) + for behavior in result["benchmarks"]: + python_result.append({ + "language": "python", + "dataFileName": extract_file_name(result["filename"]), + "behavior": behavior, + "throughput": avg_size / + result["benchmarks"][behavior] * 1e9 / 2 ** 20 + }) + + +# Java results example: +# [ +# { +# "id": string, +# "instrumentSpec": {...}, +# "measurements": [ +# { +# "weight": float, +# "value": { +# "magnitude": float, +# "unit": string +# }, +# ... +# }, +# ... +# ], +# "run": {...}, +# "scenario": { +# "benchmarkSpec": { +# "methodName": string, +# "parameters": { +# defined parameters in the benchmark: parameters value +# }, +# ... +# }, +# ... +# } +# +# }, +# ... +# ] +def parse_java_result(filename): + global average_bytes_per_message, java_result + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + results = json.loads(f.read()) + for result in results: + total_weight = 0 + total_value = 0 + for measurement in result["measurements"]: + total_weight += measurement["weight"] + total_value += measurement["value"]["magnitude"] + avg_time = total_value * 1.0 / total_weight + total_size, _ = get_data_size( + result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) + java_result.append({ + "language": "java", + "throughput": total_size / avg_time * 1e9 / 2 ** 20, + "behavior": result["scenario"]["benchmarkSpec"]["methodName"], + "dataFileName": extract_file_name( + result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) + }) + + +# Go benchmark results: +# +# goos: linux +# goarch: amd64 +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Unmarshal-12 3000 705784 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Marshal-12 2000 634648 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Size-12 5000 244174 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Clone-12 300 4120954 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op +# PASS +# ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s +def parse_go_result(filename): + global go_result + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + for line in f: + result_list = re.split("[\ \t]+", line) + if result_list[0][:9] != "Benchmark": + continue + first_slash_index = result_list[0].find('/') + last_slash_index = result_list[0].rfind('/') + full_filename = result_list[0][first_slash_index+4:last_slash_index] # delete ../ prefix + total_bytes, _ = get_data_size(full_filename) + behavior_with_suffix = result_list[0][last_slash_index+1:] + last_dash = behavior_with_suffix.rfind("-") + if last_dash == -1: + behavior = behavior_with_suffix + else: + behavior = behavior_with_suffix[:last_dash] + go_result.append({ + "dataFilename": extract_file_name(full_filename), + "throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20, + "behavior": behavior, + "language": "go" + }) + + +def get_metadata(): + build_number = os.getenv('BUILD_NUMBER') + build_url = os.getenv('BUILD_URL') + job_name = os.getenv('JOB_NAME') + git_commit = os.getenv('GIT_COMMIT') + # actual commit is the actual head of PR that is getting tested + git_actual_commit = os.getenv('ghprbActualCommit') + + utc_timestamp = str(calendar.timegm(time.gmtime())) + metadata = {'created': utc_timestamp} + + if build_number: + metadata['buildNumber'] = build_number + if build_url: + metadata['buildUrl'] = build_url + if job_name: + metadata['jobName'] = job_name + if git_commit: + metadata['gitCommit'] = git_commit + if git_actual_commit: + metadata['gitActualCommit'] = git_actual_commit + + return metadata + + +def upload_result(result_list, metadata): + for result in result_list: + new_result = copy.deepcopy(result) + new_result['metadata'] = metadata + bq = big_query_utils.create_big_query() + row = big_query_utils.make_row(str(uuid.uuid4()), new_result) + if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET, + _TABLE + "$" + _NOW, + [row]): + print 'Error when uploading result', new_result + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-cpp", "--cpp_input_file", + help="The CPP benchmark result file's name", + default="") + parser.add_argument("-java", "--java_input_file", + help="The Java benchmark result file's name", + default="") + parser.add_argument("-python", "--python_input_file", + help="The Python benchmark result file's name", + default="") + parser.add_argument("-go", "--go_input_file", + help="The golang benchmark result file's name", + default="") + args = parser.parse_args() + + parse_cpp_result(args.cpp_input_file) + parse_python_result(args.python_input_file) + parse_java_result(args.java_input_file) + parse_go_result(args.go_input_file) + + metadata = get_metadata() + print "uploading cpp results..." + upload_result(cpp_result, metadata) + print "uploading java results..." + upload_result(java_result, metadata) + print "uploading python results..." + upload_result(python_result, metadata) + print "uploading go results..." + upload_result(go_result, metadata) |