From 805174eda2356df1b01752c8bc57019e696e0a75 Mon Sep 17 00:00:00 2001 From: Yilun Chong Date: Tue, 10 Apr 2018 13:26:17 -0700 Subject: Add script for run and upload the benchmark result to bq --- benchmarks/Makefile.am | 114 ++++---- benchmarks/README.md | 16 +- benchmarks/cpp/cpp_benchmark.cc | 254 ++++++++++++++++++ benchmarks/cpp_benchmark.cc | 254 ------------------ benchmarks/go/go_benchmark_test.go | 124 +++++++++ benchmarks/go_benchmark_test.go | 124 --------- .../main/java/com/google/protobuf/ProtoBench.java | 98 ++----- .../com/google/protobuf/ProtoCaliperBenchmark.java | 60 ++--- benchmarks/py_benchmark.py | 118 --------- benchmarks/python/py_benchmark.py | 150 +++++++++++ benchmarks/python/python_benchmark_messages.cc | 29 +++ benchmarks/python_benchmark_messages.cc | 29 --- benchmarks/util/__init__.py | 0 benchmarks/util/big_query_utils.py | 188 +++++++++++++ benchmarks/util/run_and_upload.py | 290 +++++++++++++++++++++ 15 files changed, 1143 insertions(+), 705 deletions(-) create mode 100644 benchmarks/cpp/cpp_benchmark.cc delete mode 100644 benchmarks/cpp_benchmark.cc create mode 100644 benchmarks/go/go_benchmark_test.go delete mode 100644 benchmarks/go_benchmark_test.go delete mode 100755 benchmarks/py_benchmark.py create mode 100755 benchmarks/python/py_benchmark.py create mode 100644 benchmarks/python/python_benchmark_messages.cc delete mode 100644 benchmarks/python_benchmark_messages.cc create mode 100644 benchmarks/util/__init__.py create mode 100755 benchmarks/util/big_query_utils.py create mode 100755 benchmarks/util/run_and_upload.py (limited to 'benchmarks') diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index 3de0e380..eba256f2 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -28,11 +28,11 @@ make_tmp_dir: if USE_EXTERNAL_PROTOC protoc_middleman: make_tmp_dir $(benchmarks_protoc_inputs) - $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=./tmp $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) + $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=./cpp --java_out=./tmp $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) touch protoc_middleman protoc_middleman2: make_tmp_dir $(benchmarks_protoc_inputs_proto2) - $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=./tmp $(benchmarks_protoc_inputs_proto2) + $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=./cpp --java_out=./tmp $(benchmarks_protoc_inputs_proto2) touch protoc_middleman2 else @@ -41,60 +41,60 @@ else # relative to srcdir, which may not be the same as the current directory when # building out-of-tree. protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) - oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) ) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd/cpp --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) ) touch protoc_middleman protoc_middleman2: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs) - oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2) ) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd/cpp --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2) ) touch protoc_middleman2 endif -all_data = `find . -type f -name "dataset.*.pb"` +all_data = $$(find . -type f -name "dataset.*.pb" -not -path "./tmp/*") ############# CPP RULES ############## benchmarks_protoc_outputs = \ - benchmarks.pb.cc \ - datasets/google_message1/proto3/benchmark_message1_proto3.pb.cc - + cpp/benchmarks.pb.cc \ + cpp/datasets/google_message1/proto3/benchmark_message1_proto3.pb.cc + benchmarks_protoc_outputs_header = \ - benchmarks.pb.h \ - datasets/google_message1/proto3/benchmark_message1_proto3.pb.h + cpp/benchmarks.pb.h \ + cpp/datasets/google_message1/proto3/benchmark_message1_proto3.pb.h benchmarks_protoc_outputs_proto2_header = \ - datasets/google_message1/proto2/benchmark_message1_proto2.pb.h \ - datasets/google_message2/benchmark_message2.pb.h \ - datasets/google_message3/benchmark_message3.pb.h \ - datasets/google_message3/benchmark_message3_1.pb.h \ - datasets/google_message3/benchmark_message3_2.pb.h \ - datasets/google_message3/benchmark_message3_3.pb.h \ - datasets/google_message3/benchmark_message3_4.pb.h \ - datasets/google_message3/benchmark_message3_5.pb.h \ - datasets/google_message3/benchmark_message3_6.pb.h \ - datasets/google_message3/benchmark_message3_7.pb.h \ - datasets/google_message3/benchmark_message3_8.pb.h \ - datasets/google_message4/benchmark_message4.pb.h \ - datasets/google_message4/benchmark_message4_1.pb.h \ - datasets/google_message4/benchmark_message4_2.pb.h \ - datasets/google_message4/benchmark_message4_3.pb.h + cpp/datasets/google_message1/proto2/benchmark_message1_proto2.pb.h \ + cpp/datasets/google_message2/benchmark_message2.pb.h \ + cpp/datasets/google_message3/benchmark_message3.pb.h \ + cpp/datasets/google_message3/benchmark_message3_1.pb.h \ + cpp/datasets/google_message3/benchmark_message3_2.pb.h \ + cpp/datasets/google_message3/benchmark_message3_3.pb.h \ + cpp/datasets/google_message3/benchmark_message3_4.pb.h \ + cpp/datasets/google_message3/benchmark_message3_5.pb.h \ + cpp/datasets/google_message3/benchmark_message3_6.pb.h \ + cpp/datasets/google_message3/benchmark_message3_7.pb.h \ + cpp/datasets/google_message3/benchmark_message3_8.pb.h \ + cpp/datasets/google_message4/benchmark_message4.pb.h \ + cpp/datasets/google_message4/benchmark_message4_1.pb.h \ + cpp/datasets/google_message4/benchmark_message4_2.pb.h \ + cpp/datasets/google_message4/benchmark_message4_3.pb.h benchmarks_protoc_outputs_proto2 = \ - datasets/google_message1/proto2/benchmark_message1_proto2.pb.cc \ - datasets/google_message2/benchmark_message2.pb.cc \ - datasets/google_message3/benchmark_message3.pb.cc \ - datasets/google_message3/benchmark_message3_1.pb.cc \ - datasets/google_message3/benchmark_message3_2.pb.cc \ - datasets/google_message3/benchmark_message3_3.pb.cc \ - datasets/google_message3/benchmark_message3_4.pb.cc \ - datasets/google_message3/benchmark_message3_5.pb.cc \ - datasets/google_message3/benchmark_message3_6.pb.cc \ - datasets/google_message3/benchmark_message3_7.pb.cc \ - datasets/google_message3/benchmark_message3_8.pb.cc \ - datasets/google_message4/benchmark_message4.pb.cc \ - datasets/google_message4/benchmark_message4_1.pb.cc \ - datasets/google_message4/benchmark_message4_2.pb.cc \ - datasets/google_message4/benchmark_message4_3.pb.cc + cpp/datasets/google_message1/proto2/benchmark_message1_proto2.pb.cc \ + cpp/datasets/google_message2/benchmark_message2.pb.cc \ + cpp/datasets/google_message3/benchmark_message3.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_1.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_2.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_3.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_4.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_5.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_6.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_7.pb.cc \ + cpp/datasets/google_message3/benchmark_message3_8.pb.cc \ + cpp/datasets/google_message4/benchmark_message4.pb.cc \ + cpp/datasets/google_message4/benchmark_message4_1.pb.cc \ + cpp/datasets/google_message4/benchmark_message4_2.pb.cc \ + cpp/datasets/google_message4/benchmark_message4_3.pb.cc $(benchmarks_protoc_outputs): protoc_middleman @@ -118,12 +118,12 @@ AM_CXXFLAGS = $(NO_OPT_CXXFLAGS) $(PROTOBUF_OPT_FLAG) -Wall -Wwrite-strings -Wov bin_PROGRAMS = cpp-benchmark cpp_benchmark_LDADD = $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/third_party/benchmark/src/libbenchmark.a -cpp_benchmark_SOURCES = cpp_benchmark.cc -cpp_benchmark_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) -I$(top_srcdir)/third_party/benchmark/include +cpp_benchmark_SOURCES = cpp/cpp_benchmark.cc +cpp_benchmark_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)/cpp -I$(top_srcdir)/third_party/benchmark/include # Explicit deps because BUILT_SOURCES are only done before a "make all/check" # so a direct "make test_cpp" could fail if parallel enough. # See: https://www.gnu.org/software/automake/manual/html_node/Built-Sources-Example.html#Recording-Dependencies-manually -cpp_benchmark-cpp_benchmark.$(OBJEXT): $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/third_party/benchmark/src/libbenchmark.a +cpp/cpp_benchmark-cpp_benchmark.$(OBJEXT): $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/third_party/benchmark/src/libbenchmark.a nodist_cpp_benchmark_SOURCES = \ $(benchmarks_protoc_outputs) \ $(benchmarks_protoc_outputs_proto2) \ @@ -174,11 +174,11 @@ python_add_init: protoc_middleman protoc_middleman2 python_cpp_pkg_flags = `pkg-config --cflags --libs python` lib_LTLIBRARIES = libbenchmark_messages.la -libbenchmark_messages_la_SOURCES = python_benchmark_messages.cc +libbenchmark_messages_la_SOURCES = python/python_benchmark_messages.cc libbenchmark_messages_la_LIBADD = $(top_srcdir)/src/.libs/libprotobuf.la libbenchmark_messages_la_LDFLAGS = -version-info 1:0:0 -export-dynamic -libbenchmark_messages_la_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) $(python_cpp_pkg_flags) -libbenchmark_messages_la-libbenchmark_messages_la.$(OBJEXT): $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) +libbenchmark_messages_la_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)/cpp $(python_cpp_pkg_flags) +libbenchmark_messages_la-python_benchmark_messages.$(OBJEXT): $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) nodist_libbenchmark_messages_la_SOURCES = \ $(benchmarks_protoc_outputs) \ $(benchmarks_protoc_outputs_proto2) \ @@ -191,8 +191,8 @@ python-pure-python-benchmark: python_add_init @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark @echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'python\' >> python-pure-python-benchmark - @echo cp py_benchmark.py tmp >> python-pure-python-benchmark - @echo python tmp/py_benchmark.py false '$$@' >> python-pure-python-benchmark + @echo cp python/py_benchmark.py tmp >> python-pure-python-benchmark + @echo python tmp/py_benchmark.py '$$@' >> python-pure-python-benchmark @chmod +x python-pure-python-benchmark python-cpp-reflection-benchmark: python_add_init @@ -201,8 +201,8 @@ python-cpp-reflection-benchmark: python_add_init @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark @echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-reflection-benchmark - @echo cp py_benchmark.py tmp >> python-cpp-reflection-benchmark - @echo python tmp/py_benchmark.py false '$$@' >> python-cpp-reflection-benchmark + @echo cp python/py_benchmark.py tmp >> python-cpp-reflection-benchmark + @echo python tmp/py_benchmark.py '$$@' >> python-cpp-reflection-benchmark @chmod +x python-cpp-reflection-benchmark python-cpp-generated-code-benchmark: python_add_init libbenchmark_messages.la @@ -211,8 +211,8 @@ python-cpp-generated-code-benchmark: python_add_init libbenchmark_messages.la @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark @echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-generated-code-benchmark - @echo cp py_benchmark.py tmp >> python-cpp-generated-code-benchmark - @echo python tmp/py_benchmark.py true '$$@' >> python-cpp-generated-code-benchmark + @echo cp python/py_benchmark.py tmp >> python-cpp-generated-code-benchmark + @echo python tmp/py_benchmark.py --cpp_generated '$$@' >> python-cpp-generated-code-benchmark @chmod +x python-cpp-generated-code-benchmark python-pure-python: python-pure-python-benchmark @@ -287,9 +287,13 @@ endif go-benchmark: go_protoc_middleman go_protoc_middleman2 @echo "Writing shortcut script go-benchmark..." @echo '#! /bin/sh' > go-benchmark - @echo 'mkdir tmp_cc && mv *.cc tmp_cc' >> go-benchmark - @echo 'go test -bench=. -- $$@' >> go-benchmark - @echo 'mv tmp_cc/* . && rm -rf tmp_cc' >> go-benchmark + @echo 'cd go' >> go-benchmark + @echo 'all_data=""' >> go-benchmark + @echo 'conf=()' >> go-benchmark + @echo 'data_files=()' >> go-benchmark + @echo 'for arg in $$@; do if [[ $${arg:0:1} == "-" ]]; then conf+=($$arg); else data_files+=("../$$arg"); fi; done' >> go-benchmark + @echo 'go test -bench=. $${conf[*]} -- $${data_files[*]}' >> go-benchmark + @echo 'cd ..' >> go-benchmark @chmod +x go-benchmark go: go_protoc_middleman go_protoc_middleman2 go-benchmark diff --git a/benchmarks/README.md b/benchmarks/README.md index 74c042d8..8460a18a 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -107,49 +107,51 @@ $ make python-cpp-generated-code $ make go ``` -To run a specific dataset: +To run a specific dataset or run with specific options: ### Java: ``` $ make java-benchmark -$ ./java-benchmark $(specific generated dataset file name) [-- $(caliper option)] +$ ./java-benchmark $(specific generated dataset file name) [$(caliper options)] ``` ### CPP: ``` $ make cpp-benchmark -$ ./cpp-benchmark $(specific generated dataset file name) +$ ./cpp-benchmark $(specific generated dataset file name) [$(benchmark options)] ``` ### Python: +For Python benchmark we have `--json` for outputing the json result + #### Pure Python: ``` $ make python-pure-python-benchmark -$ ./python-pure-python-benchmark $(specific generated dataset file name) +$ ./python-pure-python-benchmark [--json] $(specific generated dataset file name) ``` #### CPP reflection: ``` $ make python-cpp-reflection-benchmark -$ ./python-cpp-reflection-benchmark $(specific generated dataset file name) +$ ./python-cpp-reflection-benchmark [--json] $(specific generated dataset file name) ``` #### CPP generated code: ``` $ make python-cpp-generated-code-benchmark -$ ./python-cpp-generated-code-benchmark $(specific generated dataset file name) +$ ./python-cpp-generated-code-benchmark [--json] $(specific generated dataset file name) ``` ### Go: ``` $ make go-benchmark -$ ./go-benchmark $(specific generated dataset file name) +$ ./go-benchmark $(specific generated dataset file name) [go testing options] ``` diff --git a/benchmarks/cpp/cpp_benchmark.cc b/benchmarks/cpp/cpp_benchmark.cc new file mode 100644 index 00000000..f8b55291 --- /dev/null +++ b/benchmarks/cpp/cpp_benchmark.cc @@ -0,0 +1,254 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include "benchmark/benchmark_api.h" +#include "benchmarks.pb.h" +#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" +#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" +#include "datasets/google_message2/benchmark_message2.pb.h" +#include "datasets/google_message3/benchmark_message3.pb.h" +#include "datasets/google_message4/benchmark_message4.pb.h" + + +#define PREFIX "dataset." +#define SUFFIX ".pb" + +using benchmarks::BenchmarkDataset; +using google::protobuf::Arena; +using google::protobuf::Descriptor; +using google::protobuf::DescriptorPool; +using google::protobuf::Message; +using google::protobuf::MessageFactory; + +class Fixture : public benchmark::Fixture { + public: + Fixture(const BenchmarkDataset& dataset, const std::string& suffix) { + for (int i = 0; i < dataset.payload_size(); i++) { + payloads_.push_back(dataset.payload(i)); + } + + const Descriptor* d = + DescriptorPool::generated_pool()->FindMessageTypeByName( + dataset.message_name()); + + if (!d) { + std::cerr << "Couldn't find message named '" << dataset.message_name() + << "\n"; + } + + prototype_ = MessageFactory::generated_factory()->GetPrototype(d); + SetName((dataset.name() + suffix).c_str()); + } + + protected: + std::vector payloads_; + const Message* prototype_; +}; + +class WrappingCounter { + public: + WrappingCounter(size_t limit) : value_(0), limit_(limit) {} + + size_t Next() { + size_t ret = value_; + if (++value_ == limit_) { + value_ = 0; + } + return ret; + } + + private: + size_t value_; + size_t limit_; +}; + +template +class ParseNewFixture : public Fixture { + public: + ParseNewFixture(const BenchmarkDataset& dataset) + : Fixture(dataset, "_parse_new") {} + + virtual void BenchmarkCase(benchmark::State& state) { + WrappingCounter i(payloads_.size()); + size_t total = 0; + + while (state.KeepRunning()) { + T m; + const std::string& payload = payloads_[i.Next()]; + total += payload.size(); + m.ParseFromString(payload); + } + + state.SetBytesProcessed(total); + } +}; + +template +class ParseNewArenaFixture : public Fixture { + public: + ParseNewArenaFixture(const BenchmarkDataset& dataset) + : Fixture(dataset, "_parse_newarena") {} + + virtual void BenchmarkCase(benchmark::State& state) { + WrappingCounter i(payloads_.size()); + size_t total = 0; + Arena arena; + + while (state.KeepRunning()) { + arena.Reset(); + Message* m = Arena::CreateMessage(&arena); + const std::string& payload = payloads_[i.Next()]; + total += payload.size(); + m->ParseFromString(payload); + } + + state.SetBytesProcessed(total); + } +}; + +template +class ParseReuseFixture : public Fixture { + public: + ParseReuseFixture(const BenchmarkDataset& dataset) + : Fixture(dataset, "_parse_reuse") {} + + virtual void BenchmarkCase(benchmark::State& state) { + T m; + WrappingCounter i(payloads_.size()); + size_t total = 0; + + while (state.KeepRunning()) { + const std::string& payload = payloads_[i.Next()]; + total += payload.size(); + m.ParseFromString(payload); + } + + state.SetBytesProcessed(total); + } +}; + +template +class SerializeFixture : public Fixture { + public: + SerializeFixture(const BenchmarkDataset& dataset) + : Fixture(dataset, "_serialize") { + for (size_t i = 0; i < payloads_.size(); i++) { + message_.push_back(new T); + message_.back()->ParseFromString(payloads_[i]); + } + } + + ~SerializeFixture() { + for (size_t i = 0; i < message_.size(); i++) { + delete message_[i]; + } + } + + virtual void BenchmarkCase(benchmark::State& state) { + size_t total = 0; + std::string str; + WrappingCounter i(payloads_.size()); + + while (state.KeepRunning()) { + str.clear(); + message_[i.Next()]->SerializeToString(&str); + total += str.size(); + } + + state.SetBytesProcessed(total); + } + + private: + std::vector message_; +}; + +std::string ReadFile(const std::string& name) { + std::ifstream file(name.c_str()); + GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" << name << + "', please make sure you are running " + "this command from the benchmarks/ " + "directory.\n"; + return std::string((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); +} + +template +void RegisterBenchmarksForType(const BenchmarkDataset& dataset) { + ::benchmark::internal::RegisterBenchmarkInternal( + new ParseNewFixture(dataset)); + ::benchmark::internal::RegisterBenchmarkInternal( + new ParseReuseFixture(dataset)); + ::benchmark::internal::RegisterBenchmarkInternal( + new ParseNewArenaFixture(dataset)); + ::benchmark::internal::RegisterBenchmarkInternal( + new SerializeFixture(dataset)); +} + +void RegisterBenchmarks(const std::string& dataset_bytes) { + BenchmarkDataset dataset; + GOOGLE_CHECK(dataset.ParseFromString(dataset_bytes)); + + if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") { + RegisterBenchmarksForType(dataset); + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") { + RegisterBenchmarksForType(dataset); + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") { + RegisterBenchmarksForType(dataset); + } else if (dataset.message_name() == + "benchmarks.google_message3.GoogleMessage3") { + RegisterBenchmarksForType + (dataset); + } else if (dataset.message_name() == + "benchmarks.google_message4.GoogleMessage4") { + RegisterBenchmarksForType + (dataset); + } else { + std::cerr << "Unknown message type: " << dataset.message_name(); + exit(1); + } +} + +int main(int argc, char *argv[]) { + ::benchmark::Initialize(&argc, argv); + if (argc == 1) { + std::cerr << "Usage: ./cpp-benchmark " << std::endl; + std::cerr << "input data is in the format of \"benchmarks.proto\"" + << std::endl; + return 1; + } else { + for (int i = 1; i < argc; i++) { + RegisterBenchmarks(ReadFile(argv[i])); + } + } + + ::benchmark::RunSpecifiedBenchmarks(); +} diff --git a/benchmarks/cpp_benchmark.cc b/benchmarks/cpp_benchmark.cc deleted file mode 100644 index f8b55291..00000000 --- a/benchmarks/cpp_benchmark.cc +++ /dev/null @@ -1,254 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include -#include "benchmark/benchmark_api.h" -#include "benchmarks.pb.h" -#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" -#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" -#include "datasets/google_message2/benchmark_message2.pb.h" -#include "datasets/google_message3/benchmark_message3.pb.h" -#include "datasets/google_message4/benchmark_message4.pb.h" - - -#define PREFIX "dataset." -#define SUFFIX ".pb" - -using benchmarks::BenchmarkDataset; -using google::protobuf::Arena; -using google::protobuf::Descriptor; -using google::protobuf::DescriptorPool; -using google::protobuf::Message; -using google::protobuf::MessageFactory; - -class Fixture : public benchmark::Fixture { - public: - Fixture(const BenchmarkDataset& dataset, const std::string& suffix) { - for (int i = 0; i < dataset.payload_size(); i++) { - payloads_.push_back(dataset.payload(i)); - } - - const Descriptor* d = - DescriptorPool::generated_pool()->FindMessageTypeByName( - dataset.message_name()); - - if (!d) { - std::cerr << "Couldn't find message named '" << dataset.message_name() - << "\n"; - } - - prototype_ = MessageFactory::generated_factory()->GetPrototype(d); - SetName((dataset.name() + suffix).c_str()); - } - - protected: - std::vector payloads_; - const Message* prototype_; -}; - -class WrappingCounter { - public: - WrappingCounter(size_t limit) : value_(0), limit_(limit) {} - - size_t Next() { - size_t ret = value_; - if (++value_ == limit_) { - value_ = 0; - } - return ret; - } - - private: - size_t value_; - size_t limit_; -}; - -template -class ParseNewFixture : public Fixture { - public: - ParseNewFixture(const BenchmarkDataset& dataset) - : Fixture(dataset, "_parse_new") {} - - virtual void BenchmarkCase(benchmark::State& state) { - WrappingCounter i(payloads_.size()); - size_t total = 0; - - while (state.KeepRunning()) { - T m; - const std::string& payload = payloads_[i.Next()]; - total += payload.size(); - m.ParseFromString(payload); - } - - state.SetBytesProcessed(total); - } -}; - -template -class ParseNewArenaFixture : public Fixture { - public: - ParseNewArenaFixture(const BenchmarkDataset& dataset) - : Fixture(dataset, "_parse_newarena") {} - - virtual void BenchmarkCase(benchmark::State& state) { - WrappingCounter i(payloads_.size()); - size_t total = 0; - Arena arena; - - while (state.KeepRunning()) { - arena.Reset(); - Message* m = Arena::CreateMessage(&arena); - const std::string& payload = payloads_[i.Next()]; - total += payload.size(); - m->ParseFromString(payload); - } - - state.SetBytesProcessed(total); - } -}; - -template -class ParseReuseFixture : public Fixture { - public: - ParseReuseFixture(const BenchmarkDataset& dataset) - : Fixture(dataset, "_parse_reuse") {} - - virtual void BenchmarkCase(benchmark::State& state) { - T m; - WrappingCounter i(payloads_.size()); - size_t total = 0; - - while (state.KeepRunning()) { - const std::string& payload = payloads_[i.Next()]; - total += payload.size(); - m.ParseFromString(payload); - } - - state.SetBytesProcessed(total); - } -}; - -template -class SerializeFixture : public Fixture { - public: - SerializeFixture(const BenchmarkDataset& dataset) - : Fixture(dataset, "_serialize") { - for (size_t i = 0; i < payloads_.size(); i++) { - message_.push_back(new T); - message_.back()->ParseFromString(payloads_[i]); - } - } - - ~SerializeFixture() { - for (size_t i = 0; i < message_.size(); i++) { - delete message_[i]; - } - } - - virtual void BenchmarkCase(benchmark::State& state) { - size_t total = 0; - std::string str; - WrappingCounter i(payloads_.size()); - - while (state.KeepRunning()) { - str.clear(); - message_[i.Next()]->SerializeToString(&str); - total += str.size(); - } - - state.SetBytesProcessed(total); - } - - private: - std::vector message_; -}; - -std::string ReadFile(const std::string& name) { - std::ifstream file(name.c_str()); - GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" << name << - "', please make sure you are running " - "this command from the benchmarks/ " - "directory.\n"; - return std::string((std::istreambuf_iterator(file)), - std::istreambuf_iterator()); -} - -template -void RegisterBenchmarksForType(const BenchmarkDataset& dataset) { - ::benchmark::internal::RegisterBenchmarkInternal( - new ParseNewFixture(dataset)); - ::benchmark::internal::RegisterBenchmarkInternal( - new ParseReuseFixture(dataset)); - ::benchmark::internal::RegisterBenchmarkInternal( - new ParseNewArenaFixture(dataset)); - ::benchmark::internal::RegisterBenchmarkInternal( - new SerializeFixture(dataset)); -} - -void RegisterBenchmarks(const std::string& dataset_bytes) { - BenchmarkDataset dataset; - GOOGLE_CHECK(dataset.ParseFromString(dataset_bytes)); - - if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") { - RegisterBenchmarksForType(dataset); - } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") { - RegisterBenchmarksForType(dataset); - } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") { - RegisterBenchmarksForType(dataset); - } else if (dataset.message_name() == - "benchmarks.google_message3.GoogleMessage3") { - RegisterBenchmarksForType - (dataset); - } else if (dataset.message_name() == - "benchmarks.google_message4.GoogleMessage4") { - RegisterBenchmarksForType - (dataset); - } else { - std::cerr << "Unknown message type: " << dataset.message_name(); - exit(1); - } -} - -int main(int argc, char *argv[]) { - ::benchmark::Initialize(&argc, argv); - if (argc == 1) { - std::cerr << "Usage: ./cpp-benchmark " << std::endl; - std::cerr << "input data is in the format of \"benchmarks.proto\"" - << std::endl; - return 1; - } else { - for (int i = 1; i < argc; i++) { - RegisterBenchmarks(ReadFile(argv[i])); - } - } - - ::benchmark::RunSpecifiedBenchmarks(); -} diff --git a/benchmarks/go/go_benchmark_test.go b/benchmarks/go/go_benchmark_test.go new file mode 100644 index 00000000..8c741b71 --- /dev/null +++ b/benchmarks/go/go_benchmark_test.go @@ -0,0 +1,124 @@ +package main + +import ( + benchmarkWrapper "../tmp" + googleMessage1Proto2 "../tmp/datasets/google_message1/proto2" + googleMessage1Proto3 "../tmp/datasets/google_message1/proto3" + googleMessage2 "../tmp/datasets/google_message2" + googleMessage3 "../tmp/datasets/google_message3" + googleMessage4 "../tmp/datasets/google_message4" + "flag" + "github.com/golang/protobuf/proto" + "io/ioutil" + "testing" +) + +// Data is returned by the Load function. +type Dataset struct { + name string + newMessage func() proto.Message + marshaled [][]byte + unmarshaled []proto.Message +} + +var datasets []Dataset + +// This is used to getDefaultInstance for a message type. +func generateNewMessageFunction(dataset benchmarkWrapper.BenchmarkDataset) func() proto.Message { + switch dataset.MessageName { + case "benchmarks.proto3.GoogleMessage1": + return func() proto.Message { return new(googleMessage1Proto3.GoogleMessage1) } + case "benchmarks.proto2.GoogleMessage1": + return func() proto.Message { return new(googleMessage1Proto2.GoogleMessage1) } + case "benchmarks.proto2.GoogleMessage2": + return func() proto.Message { return new(googleMessage2.GoogleMessage2) } + case "benchmarks.google_message3.GoogleMessage3": + return func() proto.Message { return new(googleMessage3.GoogleMessage3) } + case "benchmarks.google_message4.GoogleMessage4": + return func() proto.Message { return new(googleMessage4.GoogleMessage4) } + default: + panic("Unknown message type: " + dataset.MessageName) + } +} + +func init() { + flag.Parse() + for _, f := range flag.Args() { + // Load the benchmark. + b, err := ioutil.ReadFile(f) + if err != nil { + panic(err) + } + + // Parse the benchmark. + var dm benchmarkWrapper.BenchmarkDataset + if err := proto.Unmarshal(b, &dm); err != nil { + panic(err) + } + + // Determine the concrete protobuf message type to use. + var ds Dataset + ds.newMessage = generateNewMessageFunction(dm) + + // Unmarshal each test message. + for _, payload := range dm.Payload { + ds.marshaled = append(ds.marshaled, payload) + m := ds.newMessage() + if err := proto.Unmarshal(payload, m); err != nil { + panic(err) + } + ds.unmarshaled = append(ds.unmarshaled, m) + } + ds.name = f + + datasets = append(datasets, ds) + } +} + +func Benchmark(b *testing.B) { + for _, ds := range datasets { + b.Run(ds.name, func(b *testing.B) { + b.Run("Unmarshal", func(b *testing.B) { + for i := 0; i < b.N; i++ { + for j, payload := range ds.marshaled { + out := ds.newMessage() + if err := proto.Unmarshal(payload, out); err != nil { + b.Fatalf("can't unmarshal message %d %v", j, err) + } + } + } + }) + b.Run("Marshal", func(b *testing.B) { + for i := 0; i < b.N; i++ { + for j, m := range ds.unmarshaled { + if _, err := proto.Marshal(m); err != nil { + b.Fatalf("can't marshal message %d %+v: %v", j, m, err) + } + } + } + }) + b.Run("Size", func(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, m := range ds.unmarshaled { + proto.Size(m) + } + } + }) + b.Run("Clone", func(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, m := range ds.unmarshaled { + proto.Clone(m) + } + } + }) + b.Run("Merge", func(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, m := range ds.unmarshaled { + out := ds.newMessage() + proto.Merge(out, m) + } + } + }) + }) + } +} diff --git a/benchmarks/go_benchmark_test.go b/benchmarks/go_benchmark_test.go deleted file mode 100644 index e747465e..00000000 --- a/benchmarks/go_benchmark_test.go +++ /dev/null @@ -1,124 +0,0 @@ -package main - -import ( - benchmarkWrapper "./tmp" - googleMessage1Proto2 "./tmp/datasets/google_message1/proto2" - googleMessage1Proto3 "./tmp/datasets/google_message1/proto3" - googleMessage2 "./tmp/datasets/google_message2" - googleMessage3 "./tmp/datasets/google_message3" - googleMessage4 "./tmp/datasets/google_message4" - "flag" - "github.com/golang/protobuf/proto" - "io/ioutil" - "testing" -) - -// Data is returned by the Load function. -type Dataset struct { - name string - newMessage func() proto.Message - marshaled [][]byte - unmarshaled []proto.Message -} - -var datasets []Dataset - -// This is used to getDefaultInstance for a message type. -func generateNewMessageFunction(dataset benchmarkWrapper.BenchmarkDataset) func() proto.Message { - switch dataset.MessageName { - case "benchmarks.proto3.GoogleMessage1": - return func() proto.Message { return new(googleMessage1Proto3.GoogleMessage1) } - case "benchmarks.proto2.GoogleMessage1": - return func() proto.Message { return new(googleMessage1Proto2.GoogleMessage1) } - case "benchmarks.proto2.GoogleMessage2": - return func() proto.Message { return new(googleMessage2.GoogleMessage2) } - case "benchmarks.google_message3.GoogleMessage3": - return func() proto.Message { return new(googleMessage3.GoogleMessage3) } - case "benchmarks.google_message4.GoogleMessage4": - return func() proto.Message { return new(googleMessage4.GoogleMessage4) } - default: - panic("Unknown message type: " + dataset.MessageName) - } -} - -func init() { - flag.Parse() - for _, f := range flag.Args() { - // Load the benchmark. - b, err := ioutil.ReadFile(f) - if err != nil { - panic(err) - } - - // Parse the benchmark. - var dm benchmarkWrapper.BenchmarkDataset - if err := proto.Unmarshal(b, &dm); err != nil { - panic(err) - } - - // Determine the concrete protobuf message type to use. - var ds Dataset - ds.newMessage = generateNewMessageFunction(dm) - - // Unmarshal each test message. - for _, payload := range dm.Payload { - ds.marshaled = append(ds.marshaled, payload) - m := ds.newMessage() - if err := proto.Unmarshal(payload, m); err != nil { - panic(err) - } - ds.unmarshaled = append(ds.unmarshaled, m) - } - ds.name = f - - datasets = append(datasets, ds) - } -} - -func Benchmark(b *testing.B) { - for _, ds := range datasets { - b.Run(ds.name, func(b *testing.B) { - b.Run("Unmarshal", func(b *testing.B) { - for i := 0; i < b.N; i++ { - for j, payload := range ds.marshaled { - out := ds.newMessage() - if err := proto.Unmarshal(payload, out); err != nil { - b.Fatalf("can't unmarshal message %d %v", j, err) - } - } - } - }) - b.Run("Marshal", func(b *testing.B) { - for i := 0; i < b.N; i++ { - for j, m := range ds.unmarshaled { - if _, err := proto.Marshal(m); err != nil { - b.Fatalf("can't marshal message %d %+v: %v", j, m, err) - } - } - } - }) - b.Run("Size", func(b *testing.B) { - for i := 0; i < b.N; i++ { - for _, m := range ds.unmarshaled { - proto.Size(m) - } - } - }) - b.Run("Clone", func(b *testing.B) { - for i := 0; i < b.N; i++ { - for _, m := range ds.unmarshaled { - proto.Clone(m) - } - } - }) - b.Run("Merge", func(b *testing.B) { - for i := 0; i < b.N; i++ { - for _, m := range ds.unmarshaled { - out := ds.newMessage() - proto.Merge(out, m) - } - } - }) - }) - } -} diff --git a/benchmarks/java/src/main/java/com/google/protobuf/ProtoBench.java b/benchmarks/java/src/main/java/com/google/protobuf/ProtoBench.java index f35b180a..02503905 100755 --- a/benchmarks/java/src/main/java/com/google/protobuf/ProtoBench.java +++ b/benchmarks/java/src/main/java/com/google/protobuf/ProtoBench.java @@ -69,40 +69,18 @@ public class ProtoBench { System.err.println("input data is in the format of \"benchmarks.proto\""); System.exit(1); } - boolean success = true; - for (int i = 0; i < args.length; i++) { - success &= runTest(args[i]); - } - System.exit(success ? 0 : 1); + + System.exit(runTest(args) ? 0 : 1); } - - /** - * Runs a single test with specific test data. Error messages are displayed to stderr, - * and the return value indicates general success/failure. - */ - public static boolean runTest(String file) { - byte[] inputData; - BenchmarkDataset benchmarkDataset; - try { - inputData = readAllBytes(file); - benchmarkDataset = BenchmarkDataset.parseFrom(inputData); - } catch (IOException e) { - System.err.println("Unable to get input data"); - return false; - } - List argsList = getCaliperOption(benchmarkDataset); - if (argsList == null) { - System.err.println("Unable to get default message " + benchmarkDataset.getMessageName()); - return false; - } - argsList.add("-DdataFile=" + file); + public static boolean runTest(String args[]) { + List argsList = getCaliperOption(args); argsList.add("com.google.protobuf.ProtoCaliperBenchmark"); try { - String args[] = new String[argsList.size()]; - argsList.toArray(args); - CaliperMain.exitlessMain(args, + String newArgs[] = new String[argsList.size()]; + argsList.toArray(newArgs); + CaliperMain.exitlessMain(newArgs, new PrintWriter(System.out, true), new PrintWriter(System.err, true)); } catch (Exception e) { System.err.println("Error: " + e.getMessage()); @@ -110,54 +88,22 @@ public class ProtoBench { e.printStackTrace(System.err); return false; } - try { - double mininumScale = 0; - // If the file not exist, this will throw IOException, which won't print the warning - // information below. - Scanner scanner = new Scanner(new String(readAllBytes("JavaBenchmarkWarning.txt"))); - while (scanner.hasNext()) { - mininumScale = Math.max(mininumScale, scanner.nextDouble()); - } - scanner.close(); - - System.out.println( - "WARNING: This benchmark's whole iterations are not enough, consider to config caliper to " - + "run for more time to make the result more convincing. You may change the configure " - + "code in com.google.protobuf.ProtoBench.getCaliperOption() of benchmark " - + benchmarkDataset.getMessageName() - + " to run for more time. e.g. Change the value of " - + "instrument.runtime.options.timingInterval or value of " - + "instrument.runtime.options.measurements to be at least " - + Math.round(mininumScale * 10 + 1) / 10.0 - + " times of before, then build and run the benchmark again\n"); - Files.deleteIfExists(Paths.get("JavaBenchmarkWarning.txt")); - } catch (IOException e) { - // The IOException here should be file not found, which means there's no warning generated by - // The benchmark, so this IOException should be discarded. - } return true; } - - private static List getCaliperOption(final BenchmarkDataset benchmarkDataset) { + private static List getCaliperOption(String args[]) { List temp = new ArrayList(); - if (benchmarkDataset.getMessageName().equals("benchmarks.proto3.GoogleMessage1")) { - } else if (benchmarkDataset.getMessageName().equals("benchmarks.proto2.GoogleMessage1")) { - } else if (benchmarkDataset.getMessageName().equals("benchmarks.proto2.GoogleMessage2")) { - } else if (benchmarkDataset.getMessageName(). - equals("benchmarks.google_message3.GoogleMessage3")) { - temp.add("-Cinstrument.runtime.options.timingInterval=3000ms"); - temp.add("-Cinstrument.runtime.options.measurements=20"); - } else if (benchmarkDataset.getMessageName(). - equals("benchmarks.google_message4.GoogleMessage4")) { - temp.add("-Cinstrument.runtime.options.timingInterval=1500ms"); - temp.add("-Cinstrument.runtime.options.measurements=20"); - } else { - return null; - } - temp.add("-i"); temp.add("runtime"); + String files = ""; + for (int i = 0; i < args.length; i++) { + if (args[i].charAt(0) == '-') { + temp.add(args[i]); + } else { + files += (files.equals("") ? "" : ",") + args[i]; + } + } + temp.add("-DdataFile=" + files); temp.add("-b"); String benchmarkNames = "serializeToByteString,serializeToByteArray,serializeToMemoryStream" + ",deserializeFromByteString,deserializeFromByteArray,deserializeFromMemoryStream"; @@ -165,14 +111,4 @@ public class ProtoBench { return temp; } - - public static byte[] readAllBytes(String filename) throws IOException { - if (filename.equals("")) { - return new byte[0]; - } - RandomAccessFile file = new RandomAccessFile(new File(filename), "r"); - byte[] content = new byte[(int) file.length()]; - file.readFully(content); - return content; - } } diff --git a/benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java b/benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java index b50c6585..94568aea 100755 --- a/benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java +++ b/benchmarks/java/src/main/java/com/google/protobuf/ProtoCaliperBenchmark.java @@ -99,7 +99,6 @@ public class ProtoCaliperBenchmark { private List inputStreamList; private List inputStringList; private List sampleMessageList; - private long counter; private BenchmarkMessageType getMessageType() throws IOException { if (benchmarkDataset.getMessageName().equals("benchmarks.proto3.GoogleMessage1")) { @@ -149,8 +148,6 @@ public class ProtoCaliperBenchmark { sampleMessageList.add( defaultMessage.newBuilderForType().mergeFrom(singleInputData, extensions).build()); } - - counter = 0; } @@ -160,8 +157,9 @@ public class ProtoCaliperBenchmark { return; } for (int i = 0; i < reps; i++) { - sampleMessageList.get((int) (counter % sampleMessageList.size())).toByteString(); - counter++; + for (int j = 0; j < sampleMessageList.size(); j++) { + sampleMessageList.get(j).toByteString(); + } } } @@ -171,8 +169,9 @@ public class ProtoCaliperBenchmark { return; } for (int i = 0; i < reps; i++) { - sampleMessageList.get((int) (counter % sampleMessageList.size())).toByteArray(); - counter++; + for (int j = 0; j < sampleMessageList.size(); j++) { + sampleMessageList.get(j).toByteArray(); + } } } @@ -182,9 +181,10 @@ public class ProtoCaliperBenchmark { return; } for (int i = 0; i < reps; i++) { - ByteArrayOutputStream output = new ByteArrayOutputStream(); - sampleMessageList.get((int) (counter % sampleMessageList.size())).writeTo(output); - counter++; + for (int j = 0; j < sampleMessageList.size(); j++) { + ByteArrayOutputStream output = new ByteArrayOutputStream(); + sampleMessageList.get(j).writeTo(output); + } } } @@ -194,9 +194,10 @@ public class ProtoCaliperBenchmark { return; } for (int i = 0; i < reps; i++) { - benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( - inputStringList.get((int) (counter % inputStringList.size())), extensions); - counter++; + for (int j = 0; j < inputStringList.size(); j++) { + benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( + inputStringList.get(j), extensions); + } } } @@ -206,9 +207,10 @@ public class ProtoCaliperBenchmark { return; } for (int i = 0; i < reps; i++) { - benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( - inputDataList.get((int) (counter % inputDataList.size())), extensions); - counter++; + for (int j = 0; j < inputDataList.size(); j++) { + benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( + inputDataList.get(j), extensions); + } } } @@ -218,27 +220,11 @@ public class ProtoCaliperBenchmark { return; } for (int i = 0; i < reps; i++) { - benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( - inputStreamList.get((int) (counter % inputStreamList.size())), extensions); - inputStreamList.get((int) (counter % inputStreamList.size())).reset(); - counter++; - } - } - - @AfterExperiment - void checkCounter() throws IOException { - if (counter == 1) { - // Dry run - return; - } - if (benchmarkDataset.getPayloadCount() != 1 - && counter < benchmarkDataset.getPayloadCount() * 10L) { - BufferedWriter writer = new BufferedWriter(new FileWriter("JavaBenchmarkWarning.txt", true)); - // If the total number of non-warmup reps is smaller than 100 times of the total number of - // datasets, then output the scale that need to multiply to the configuration (either extend - // the running time for one timingInterval or run for more measurements). - writer.append(1.0 * benchmarkDataset.getPayloadCount() * 10L / counter + " "); - writer.close(); + for (int j = 0; j < inputStreamList.size(); j++) { + benchmarkMessageType.getDefaultInstance().getParserForType().parseFrom( + inputStreamList.get(j), extensions); + inputStreamList.get(j).reset(); + } } } } diff --git a/benchmarks/py_benchmark.py b/benchmarks/py_benchmark.py deleted file mode 100755 index ba7a3470..00000000 --- a/benchmarks/py_benchmark.py +++ /dev/null @@ -1,118 +0,0 @@ -import sys -import os -import timeit -import math -import fnmatch - -# BEGIN CPP GENERATED MESSAGE -# CPP generated code must be linked before importing the generated Python code -# for the descriptor can be found in the pool -if len(sys.argv) < 2: - raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code") -if sys.argv[1] == "true": - sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/.libs" ) - import libbenchmark_messages - sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/tmp" ) -elif sys.argv[1] != "false": - raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code") -# END CPP GENERATED MESSAGE - -import datasets.google_message1.proto2.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2 -import datasets.google_message1.proto3.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2 -import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2 -import datasets.google_message3.benchmark_message3_pb2 as benchmark_message3_pb2 -import datasets.google_message4.benchmark_message4_pb2 as benchmark_message4_pb2 -import benchmarks_pb2 as benchmarks_pb2 - - -def run_one_test(filename): - data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read() - benchmark_dataset = benchmarks_pb2.BenchmarkDataset() - benchmark_dataset.ParseFromString(data) - benchmark_util = Benchmark(full_iteration=len(benchmark_dataset.payload), - module="py_benchmark", - setup_method="init") - print "Message %s of dataset file %s" % \ - (benchmark_dataset.message_name, filename) - benchmark_util.set_test_method("parse_from_benchmark") - print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) - benchmark_util.set_test_method("serialize_to_benchmark") - print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) - print "" - -def init(filename): - global benchmark_dataset, message_class, message_list, counter - message_list=[] - counter = 0 - data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read() - benchmark_dataset = benchmarks_pb2.BenchmarkDataset() - benchmark_dataset.ParseFromString(data) - - if benchmark_dataset.message_name == "benchmarks.proto3.GoogleMessage1": - message_class = benchmark_message1_proto3_pb2.GoogleMessage1 - elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage1": - message_class = benchmark_message1_proto2_pb2.GoogleMessage1 - elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage2": - message_class = benchmark_message2_pb2.GoogleMessage2 - elif benchmark_dataset.message_name == "benchmarks.google_message3.GoogleMessage3": - message_class = benchmark_message3_pb2.GoogleMessage3 - elif benchmark_dataset.message_name == "benchmarks.google_message4.GoogleMessage4": - message_class = benchmark_message4_pb2.GoogleMessage4 - else: - raise IOError("Message %s not found!" % (benchmark_dataset.message_name)) - - for one_payload in benchmark_dataset.payload: - temp = message_class() - temp.ParseFromString(one_payload) - message_list.append(temp) - -def parse_from_benchmark(): - global counter, message_class, benchmark_dataset - m = message_class().ParseFromString(benchmark_dataset.payload[counter % len(benchmark_dataset.payload)]) - counter = counter + 1 - -def serialize_to_benchmark(): - global counter, message_list, message_class - s = message_list[counter % len(benchmark_dataset.payload)].SerializeToString() - counter = counter + 1 - - -class Benchmark: - def __init__(self, module=None, test_method=None, - setup_method=None, full_iteration = 1): - self.full_iteration = full_iteration - self.module = module - self.test_method = test_method - self.setup_method = setup_method - - def set_test_method(self, test_method): - self.test_method = test_method - - def full_setup_code(self, setup_method_args=''): - setup_code = "" - setup_code += "from %s import %s\n" % (self.module, self.test_method) - setup_code += "from %s import %s\n" % (self.module, self.setup_method) - setup_code += "%s(%s)\n" % (self.setup_method, setup_method_args) - return setup_code - - def dry_run(self, test_method_args='', setup_method_args=''): - return timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args), - setup=self.full_setup_code(setup_method_args), - number=self.full_iteration); - - def run_benchmark(self, test_method_args='', setup_method_args=''): - reps = self.full_iteration; - t = self.dry_run(test_method_args, setup_method_args); - if t < 3 : - reps = int(math.ceil(3 / t)) * self.full_iteration - t = timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args), - setup=self.full_setup_code(setup_method_args), - number=reps); - return "Average time for %s: %.2f ns" % \ - (self.test_method, 1.0 * t / reps * (10 ** 9)) - - -if __name__ == "__main__": - for i in range(2, len(sys.argv)): - run_one_test(sys.argv[i]) - diff --git a/benchmarks/python/py_benchmark.py b/benchmarks/python/py_benchmark.py new file mode 100755 index 00000000..a551ba6d --- /dev/null +++ b/benchmarks/python/py_benchmark.py @@ -0,0 +1,150 @@ +import sys +import os +import timeit +import math +import argparse +import fnmatch +import json + +parser = argparse.ArgumentParser(description="Python protobuf benchmark") +parser.add_argument("data_files", metavar="dataFile", nargs="+", + help="testing data files.") +parser.add_argument("--json", action="store_const", dest="json", + const="yes", default="no", + help="Whether to output json results") +parser.add_argument("--behavior_prefix", dest="behavior_prefix", + help="The output json format's behavior's name's prefix", + default="") +# BEGIN CPP GENERATED MESSAGE +parser.add_argument("--cpp_generated", action="store_const", + dest="cpp_generated", const="yes", default="no", + help="Whether to link generated code library") +# END CPP GENERATED MESSAGE +args = parser.parse_args() +# BEGIN CPP GENERATED MESSAGE +# CPP generated code must be linked before importing the generated Python code +# for the descriptor can be found in the pool +if args.cpp_generated != "no": + sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/.libs" ) + import libbenchmark_messages + sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/tmp" ) +# END CPP GENERATED MESSAGE + + +import datasets.google_message1.proto2.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2 +import datasets.google_message1.proto3.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2 +import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2 +import datasets.google_message3.benchmark_message3_pb2 as benchmark_message3_pb2 +import datasets.google_message4.benchmark_message4_pb2 as benchmark_message4_pb2 +import benchmarks_pb2 as benchmarks_pb2 + + +def run_one_test(filename): + data = open(filename).read() + benchmark_dataset = benchmarks_pb2.BenchmarkDataset() + benchmark_dataset.ParseFromString(data) + benchmark_util = Benchmark(full_iteration=len(benchmark_dataset.payload), + module="py_benchmark", + setup_method="init") + result={} + result["filename"] = filename + result["message_name"] = benchmark_dataset.message_name + result["benchmarks"] = {} + benchmark_util.set_test_method("parse_from_benchmark") + result["benchmarks"][args.behavior_prefix + "_parse_from_benchmark"] = \ + benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) + benchmark_util.set_test_method("serialize_to_benchmark") + result["benchmarks"][args.behavior_prefix + "_serialize_to_benchmark"] = \ + benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) + return result + + +def init(filename): + global benchmark_dataset, message_class, message_list, counter + message_list=[] + counter = 0 + data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read() + benchmark_dataset = benchmarks_pb2.BenchmarkDataset() + benchmark_dataset.ParseFromString(data) + + if benchmark_dataset.message_name == "benchmarks.proto3.GoogleMessage1": + message_class = benchmark_message1_proto3_pb2.GoogleMessage1 + elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage1": + message_class = benchmark_message1_proto2_pb2.GoogleMessage1 + elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage2": + message_class = benchmark_message2_pb2.GoogleMessage2 + elif benchmark_dataset.message_name == "benchmarks.google_message3.GoogleMessage3": + message_class = benchmark_message3_pb2.GoogleMessage3 + elif benchmark_dataset.message_name == "benchmarks.google_message4.GoogleMessage4": + message_class = benchmark_message4_pb2.GoogleMessage4 + else: + raise IOError("Message %s not found!" % (benchmark_dataset.message_name)) + + for one_payload in benchmark_dataset.payload: + temp = message_class() + temp.ParseFromString(one_payload) + message_list.append(temp) + + +def parse_from_benchmark(): + global counter, message_class, benchmark_dataset + m = message_class().ParseFromString(benchmark_dataset.payload[counter % len(benchmark_dataset.payload)]) + counter = counter + 1 + + +def serialize_to_benchmark(): + global counter, message_list, message_class + s = message_list[counter % len(benchmark_dataset.payload)].SerializeToString() + counter = counter + 1 + + +class Benchmark: + def __init__(self, module=None, test_method=None, + setup_method=None, full_iteration = 1): + self.full_iteration = full_iteration + self.module = module + self.test_method = test_method + self.setup_method = setup_method + + def set_test_method(self, test_method): + self.test_method = test_method + + def full_setup_code(self, setup_method_args=''): + setup_code = "" + setup_code += "from %s import %s\n" % (self.module, self.test_method) + setup_code += "from %s import %s\n" % (self.module, self.setup_method) + setup_code += "%s(%s)\n" % (self.setup_method, setup_method_args) + return setup_code + + def dry_run(self, test_method_args='', setup_method_args=''): + return timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args), + setup=self.full_setup_code(setup_method_args), + number=self.full_iteration); + + def run_benchmark(self, test_method_args='', setup_method_args=''): + reps = self.full_iteration; + t = self.dry_run(test_method_args, setup_method_args); + if t < 3 : + reps = int(math.ceil(3 / t)) * self.full_iteration + t = timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args), + setup=self.full_setup_code(setup_method_args), + number=reps); + return 1.0 * t / reps * (10 ** 9) + + +if __name__ == "__main__": + results = [] + for file in args.data_files: + results.append(run_one_test(file)) + + if args.json != "no": + print json.dumps(results) + else: + for result in results: + print "Message %s of dataset file %s" % \ + (result["message_name"], result["filename"]) + print "Average time for parse_from_benchmark: %.2f ns" % \ + (result["benchmarks"]["parse_from_benchmark"]) + print "Average time for serialize_to_benchmark: %.2f ns" % \ + (result["benchmarks"]["serialize_to_benchmark"]) + print "" diff --git a/benchmarks/python/python_benchmark_messages.cc b/benchmarks/python/python_benchmark_messages.cc new file mode 100644 index 00000000..ded16fe9 --- /dev/null +++ b/benchmarks/python/python_benchmark_messages.cc @@ -0,0 +1,29 @@ +#include + +#include "benchmarks.pb.h" +#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" +#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" +#include "datasets/google_message2/benchmark_message2.pb.h" +#include "datasets/google_message3/benchmark_message3.pb.h" +#include "datasets/google_message4/benchmark_message4.pb.h" + +static PyMethodDef python_benchmark_methods[] = { + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + + +PyMODINIT_FUNC +initlibbenchmark_messages() { + benchmarks::BenchmarkDataset().descriptor(); + benchmarks::proto3::GoogleMessage1().descriptor(); + benchmarks::proto2::GoogleMessage1().descriptor(); + benchmarks::proto2::GoogleMessage2().descriptor(); + benchmarks::google_message3::GoogleMessage3().descriptor(); + benchmarks::google_message4::GoogleMessage4().descriptor(); + + PyObject *m; + + m = Py_InitModule("libbenchmark_messages", python_benchmark_methods); + if (m == NULL) + return; +} diff --git a/benchmarks/python_benchmark_messages.cc b/benchmarks/python_benchmark_messages.cc deleted file mode 100644 index 55242a2a..00000000 --- a/benchmarks/python_benchmark_messages.cc +++ /dev/null @@ -1,29 +0,0 @@ -#include - -#include "benchmarks.pb.h" -#include "datasets/google_message1/benchmark_message1_proto2.pb.h" -#include "datasets/google_message1/benchmark_message1_proto3.pb.h" -#include "datasets/google_message2/benchmark_message2.pb.h" -#include "datasets/google_message3/benchmark_message3.pb.h" -#include "datasets/google_message4/benchmark_message4.pb.h" - -static PyMethodDef python_benchmark_methods[] = { - {NULL, NULL, 0, NULL} /* Sentinel */ -}; - - -PyMODINIT_FUNC -initlibbenchmark_messages() { - benchmarks::BenchmarkDataset().descriptor(); - benchmarks::proto3::GoogleMessage1().descriptor(); - benchmarks::proto2::GoogleMessage1().descriptor(); - benchmarks::proto2::GoogleMessage2().descriptor(); - benchmarks::google_message3::GoogleMessage3().descriptor(); - benchmarks::google_message4::GoogleMessage4().descriptor(); - - PyObject *m; - - m = Py_InitModule("libbenchmark_messages", python_benchmark_methods); - if (m == NULL) - return; -} diff --git a/benchmarks/util/__init__.py b/benchmarks/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/util/big_query_utils.py b/benchmarks/util/big_query_utils.py new file mode 100755 index 00000000..14105aa6 --- /dev/null +++ b/benchmarks/util/big_query_utils.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python2.7 + +import argparse +import json +import uuid +import httplib2 + +from apiclient import discovery +from apiclient.errors import HttpError +from oauth2client.client import GoogleCredentials + +# 30 days in milliseconds +_EXPIRATION_MS = 30 * 24 * 60 * 60 * 1000 +NUM_RETRIES = 3 + + +def create_big_query(): + """Authenticates with cloud platform and gets a BiqQuery service object + """ + creds = GoogleCredentials.get_application_default() + return discovery.build( + 'bigquery', 'v2', credentials=creds, cache_discovery=False) + + +def create_dataset(biq_query, project_id, dataset_id): + is_success = True + body = { + 'datasetReference': { + 'projectId': project_id, + 'datasetId': dataset_id + } + } + + try: + dataset_req = biq_query.datasets().insert( + projectId=project_id, body=body) + dataset_req.execute(num_retries=NUM_RETRIES) + except HttpError as http_error: + if http_error.resp.status == 409: + print 'Warning: The dataset %s already exists' % dataset_id + else: + # Note: For more debugging info, print "http_error.content" + print 'Error in creating dataset: %s. Err: %s' % (dataset_id, + http_error) + is_success = False + return is_success + + +def create_table(big_query, project_id, dataset_id, table_id, table_schema, + description): + fields = [{ + 'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + return create_table2(big_query, project_id, dataset_id, table_id, fields, + description) + + +def create_partitioned_table(big_query, + project_id, + dataset_id, + table_id, + table_schema, + description, + partition_type='DAY', + expiration_ms=_EXPIRATION_MS): + """Creates a partitioned table. By default, a date-paritioned table is created with + each partition lasting 30 days after it was last modified. + """ + fields = [{ + 'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + return create_table2(big_query, project_id, dataset_id, table_id, fields, + description, partition_type, expiration_ms) + + +def create_table2(big_query, + project_id, + dataset_id, + table_id, + fields_schema, + description, + partition_type=None, + expiration_ms=None): + is_success = True + + body = { + 'description': description, + 'schema': { + 'fields': fields_schema + }, + 'tableReference': { + 'datasetId': dataset_id, + 'projectId': project_id, + 'tableId': table_id + } + } + + if partition_type and expiration_ms: + body["timePartitioning"] = { + "type": partition_type, + "expirationMs": expiration_ms + } + + try: + table_req = big_query.tables().insert( + projectId=project_id, datasetId=dataset_id, body=body) + res = table_req.execute(num_retries=NUM_RETRIES) + print 'Successfully created %s "%s"' % (res['kind'], res['id']) + except HttpError as http_error: + if http_error.resp.status == 409: + print 'Warning: Table %s already exists' % table_id + else: + print 'Error in creating table: %s. Err: %s' % (table_id, + http_error) + is_success = False + return is_success + + +def patch_table(big_query, project_id, dataset_id, table_id, fields_schema): + is_success = True + + body = { + 'schema': { + 'fields': fields_schema + }, + 'tableReference': { + 'datasetId': dataset_id, + 'projectId': project_id, + 'tableId': table_id + } + } + + try: + table_req = big_query.tables().patch( + projectId=project_id, + datasetId=dataset_id, + tableId=table_id, + body=body) + res = table_req.execute(num_retries=NUM_RETRIES) + print 'Successfully patched %s "%s"' % (res['kind'], res['id']) + except HttpError as http_error: + print 'Error in creating table: %s. Err: %s' % (table_id, http_error) + is_success = False + return is_success + + +def insert_rows(big_query, project_id, dataset_id, table_id, rows_list): + is_success = True + body = {'rows': rows_list} + try: + insert_req = big_query.tabledata().insertAll( + projectId=project_id, + datasetId=dataset_id, + tableId=table_id, + body=body) + res = insert_req.execute(num_retries=NUM_RETRIES) + if res.get('insertErrors', None): + print 'Error inserting rows! Response: %s' % res + is_success = False + except HttpError as http_error: + print 'Error inserting rows to the table %s' % table_id + is_success = False + + return is_success + + +def sync_query_job(big_query, project_id, query, timeout=5000): + query_data = {'query': query, 'timeoutMs': timeout} + query_job = None + try: + query_job = big_query.jobs().query( + projectId=project_id, + body=query_data).execute(num_retries=NUM_RETRIES) + except HttpError as http_error: + print 'Query execute job failed with error: %s' % http_error + print http_error.content + return query_job + + + # List of (column name, column type, description) tuples +def make_row(unique_row_id, row_values_dict): + """row_values_dict is a dictionary of column name and column value. + """ + return {'insertId': unique_row_id, 'json': row_values_dict} diff --git a/benchmarks/util/run_and_upload.py b/benchmarks/util/run_and_upload.py new file mode 100755 index 00000000..ae22a668 --- /dev/null +++ b/benchmarks/util/run_and_upload.py @@ -0,0 +1,290 @@ +import argparse +import os +import re +import copy +import uuid +import calendar +import time +import big_query_utils +import datetime +import json +# This import depends on the automake rule protoc_middleman, please make sure +# protoc_middleman has been built before run this file. +import os.path, sys +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +import tmp.benchmarks_pb2 as benchmarks_pb2 +from click.types import STRING + +_PROJECT_ID = 'grpc-testing' +_DATASET = 'protobuf_benchmark_result' +_TABLE = 'opensource_result_v1' +_NOW = "%d%02d%02d" % (datetime.datetime.now().year, + datetime.datetime.now().month, + datetime.datetime.now().day) + +file_size_map = {} + +def get_data_size(file_name): + if file_name in file_size_map: + return file_size_map[file_name] + benchmark_dataset = benchmarks_pb2.BenchmarkDataset() + benchmark_dataset.ParseFromString( + open(os.path.dirname(os.path.abspath(__file__)) + "/../" + file_name).read()) + size = 0 + count = 0 + for payload in benchmark_dataset.payload: + size += len(payload) + count += 1 + file_size_map[file_name] = (size, 1.0 * size / count) + return size, 1.0 * size / count + + +def extract_file_name(file_name): + name_list = re.split("[/\.]", file_name) + short_file_name = "" + for name in name_list: + if name[:14] == "google_message": + short_file_name = name + return short_file_name + + +cpp_result = [] +python_result = [] +java_result = [] +go_result = [] + + +# CPP results example: +# [ +# "benchmarks": [ +# { +# "bytes_per_second": int, +# "cpu_time": int, +# "name: string, +# "time_unit: string, +# ... +# }, +# ... +# ], +# ... +# ] +def parse_cpp_result(filename): + global cpp_result + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + results = json.loads(f.read()) + for benchmark in results["benchmarks"]: + data_filename = "".join( + re.split("(_parse_|_serialize)", benchmark["name"])[0]) + behavior = benchmark["name"][len(data_filename) + 1:] + cpp_result.append({ + "language": "cpp", + "dataFileName": data_filename, + "behavior": behavior, + "throughput": benchmark["bytes_per_second"] / 2.0 ** 20 + }) + + +# Python results example: +# [ +# [ +# { +# "filename": string, +# "benchmarks": { +# behavior: results, +# ... +# }, +# "message_name": STRING +# }, +# ... +# ], #pure-python +# ... +# ] +def parse_python_result(filename): + global python_result + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + results_list = json.loads(f.read()) + for results in results_list: + for result in results: + _, avg_size = get_data_size(result["filename"]) + for behavior in result["benchmarks"]: + python_result.append({ + "language": "python", + "dataFileName": extract_file_name(result["filename"]), + "behavior": behavior, + "throughput": avg_size / + result["benchmarks"][behavior] * 1e9 / 2 ** 20 + }) + + +# Java results example: +# [ +# { +# "id": string, +# "instrumentSpec": {...}, +# "measurements": [ +# { +# "weight": float, +# "value": { +# "magnitude": float, +# "unit": string +# }, +# ... +# }, +# ... +# ], +# "run": {...}, +# "scenario": { +# "benchmarkSpec": { +# "methodName": string, +# "parameters": { +# defined parameters in the benchmark: parameters value +# }, +# ... +# }, +# ... +# } +# +# }, +# ... +# ] +def parse_java_result(filename): + global average_bytes_per_message, java_result + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + results = json.loads(f.read()) + for result in results: + total_weight = 0 + total_value = 0 + for measurement in result["measurements"]: + total_weight += measurement["weight"] + total_value += measurement["value"]["magnitude"] + avg_time = total_value * 1.0 / total_weight + total_size, _ = get_data_size( + result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) + java_result.append({ + "language": "java", + "throughput": total_size / avg_time * 1e9 / 2 ** 20, + "behavior": result["scenario"]["benchmarkSpec"]["methodName"], + "dataFileName": extract_file_name( + result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) + }) + + +# Go benchmark results: +# +# goos: linux +# goarch: amd64 +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Unmarshal-12 3000 705784 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Marshal-12 2000 634648 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Size-12 5000 244174 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Clone-12 300 4120954 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op +# PASS +# ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s +def parse_go_result(filename): + global go_result + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + for line in f: + result_list = re.split("[\ \t]+", line) + if result_list[0][:9] != "Benchmark": + continue + first_slash_index = result_list[0].find('/') + last_slash_index = result_list[0].rfind('/') + full_filename = result_list[0][first_slash_index+4:last_slash_index] # delete ../ prefix + total_bytes, _ = get_data_size(full_filename) + behavior_with_suffix = result_list[0][last_slash_index+1:] + last_dash = behavior_with_suffix.rfind("-") + if last_dash == -1: + behavior = behavior_with_suffix + else: + behavior = behavior_with_suffix[:last_dash] + go_result.append({ + "dataFilename": extract_file_name(full_filename), + "throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20, + "behavior": behavior, + "language": "go" + }) + + +def get_metadata(): + build_number = os.getenv('BUILD_NUMBER') + build_url = os.getenv('BUILD_URL') + job_name = os.getenv('JOB_NAME') + git_commit = os.getenv('GIT_COMMIT') + # actual commit is the actual head of PR that is getting tested + git_actual_commit = os.getenv('ghprbActualCommit') + + utc_timestamp = str(calendar.timegm(time.gmtime())) + metadata = {'created': utc_timestamp} + + if build_number: + metadata['buildNumber'] = build_number + if build_url: + metadata['buildUrl'] = build_url + if job_name: + metadata['jobName'] = job_name + if git_commit: + metadata['gitCommit'] = git_commit + if git_actual_commit: + metadata['gitActualCommit'] = git_actual_commit + + return metadata + + +def upload_result(result_list, metadata): + for result in result_list: + new_result = copy.deepcopy(result) + new_result['metadata'] = metadata + bq = big_query_utils.create_big_query() + row = big_query_utils.make_row(str(uuid.uuid4()), new_result) + if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET, + _TABLE + "$" + _NOW, + [row]): + print 'Error when uploading result', new_result + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-cpp", "--cpp_input_file", + help="The CPP benchmark result file's name", + default="") + parser.add_argument("-java", "--java_input_file", + help="The Java benchmark result file's name", + default="") + parser.add_argument("-python", "--python_input_file", + help="The Python benchmark result file's name", + default="") + parser.add_argument("-go", "--go_input_file", + help="The golang benchmark result file's name", + default="") + args = parser.parse_args() + + parse_cpp_result(args.cpp_input_file) + parse_python_result(args.python_input_file) + parse_java_result(args.java_input_file) + parse_go_result(args.go_input_file) + + metadata = get_metadata() + print "uploading cpp results..." + upload_result(cpp_result, metadata) + print "uploading java results..." + upload_result(java_result, metadata) + print "uploading python results..." + upload_result(python_result, metadata) + print "uploading go results..." + upload_result(go_result, metadata) -- cgit v1.2.3