aboutsummaryrefslogtreecommitdiffhomepage
path: root/benchmarks
diff options
context:
space:
mode:
authorGravatar Josh Haberman <jhaberman@gmail.com>2016-04-27 18:22:22 -0700
committerGravatar Josh Haberman <jhaberman@gmail.com>2016-04-27 18:22:22 -0700
commit2e83110230b7e91b07835e9c718a1d6fbcb8b617 (patch)
tree85737c7424dab1c232d95665c584d1a69fd2f992 /benchmarks
parentf53f911793c3024976f80211e0c976f5cc51f88d (diff)
Added framework for generating/consuming benchmarking data sets.
This takes the code that was sitting in benchmarks/ already and makes it easier for language-specific benchmarks to consume. Future PRs will enhance this so that the language-specific benchmarks can report metrics back that will be tracked over time in PerfKit.
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/Makefile.am75
-rw-r--r--benchmarks/benchmark_messages_proto2.proto (renamed from benchmarks/google_speed.proto)19
-rw-r--r--benchmarks/benchmark_messages_proto3.proto76
-rw-r--r--benchmarks/benchmarks.proto102
-rw-r--r--benchmarks/generate_datasets.cc114
5 files changed, 378 insertions, 8 deletions
diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
new file mode 100644
index 00000000..79581ee9
--- /dev/null
+++ b/benchmarks/Makefile.am
@@ -0,0 +1,75 @@
+
+benchmarks_protoc_inputs = \
+ benchmarks.proto \
+ benchmark_messages_proto3.proto
+
+benchmarks_protoc_inputs_proto2 = \
+ benchmark_messages_proto2.proto
+
+benchmarks_protoc_outputs = \
+ benchmarks.pb.cc \
+ benchmarks.pb.h \
+ benchmark_messages_proto3.pb.cc \
+ benchmark_messages_proto3.pb.h
+
+benchmarks_protoc_outputs_proto2 = \
+ benchmark_messages_proto2.pb.cc \
+ benchmark_messages_proto2.pb.h
+
+bin_PROGRAMS = generate-datasets
+
+generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la
+generate_datasets_SOURCES = generate_datasets.cc
+generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)
+nodist_generate_datasets_SOURCES = \
+ google_message1.h \
+ google_message2.h \
+ $(benchmarks_protoc_outputs) \
+ $(benchmarks_protoc_outputs_proto2)
+
+# Explicit deps beacuse BUILT_SOURCES are only done before a "make all/check"
+# so a direct "make test_cpp" could fail if parallel enough.
+generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h google_message1.h google_message2.h
+
+$(benchmarks_protoc_outputs): protoc_middleman
+$(benchmarks_protoc_outputs_proto2): protoc_middleman2
+
+google_message1.h: google_message1.dat
+ xxd -i $< $@
+
+google_message2.h: google_message2.dat
+ xxd -i $< $@
+
+CLEANFILES = \
+ $(benchmarks_protoc_outputs) \
+ $(benchmarks_protoc_outputs_proto2) \
+ google_message1.h \
+ google_message2.h \
+ protoc_middleman \
+ protoc_middleman2 \
+ dataset.*
+
+if USE_EXTERNAL_PROTOC
+
+protoc_middleman: $(benchmarks_protoc_inputs)
+ $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs)
+ touch protoc_middleman
+
+protoc_middleman2: $(benchmarks_protoc_inputs_proto2)
+ $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs_proto2)
+ touch protoc_middleman2
+
+else
+
+# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is
+# relative to srcdir, which may not be the same as the current directory when
+# building out-of-tree.
+protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
+ oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs) )
+ touch protoc_middleman
+
+protoc_middleman2: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs)
+ oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs_proto2) )
+ touch protoc_middleman
+
+endif
diff --git a/benchmarks/google_speed.proto b/benchmarks/benchmark_messages_proto2.proto
index 16f6d678..c7103be5 100644
--- a/benchmarks/google_speed.proto
+++ b/benchmarks/benchmark_messages_proto2.proto
@@ -1,11 +1,14 @@
+// Benchmark messages for proto2.
+
syntax = "proto2";
-package benchmarks;
+package benchmarks.p2;
+option java_package = "com.google.protobuf.benchmarks";
-option java_outer_classname = "GoogleSpeed";
+// This is the default, but we specify it here explicitly.
option optimize_for = SPEED;
-message SpeedMessage1 {
+message GoogleMessage1 {
required string field1 = 1;
optional string field9 = 9;
optional string field18 = 18;
@@ -40,7 +43,7 @@ message SpeedMessage1 {
optional int32 field23 = 23 [default=0];
optional bool field24 = 24 [default=false];
optional int32 field25 = 25 [default=0];
- optional SpeedMessage1SubMessage field15 = 15;
+ optional GoogleMessage1SubMessage field15 = 15;
optional bool field78 = 78;
optional int32 field67 = 67 [default=0];
optional int32 field68 = 68;
@@ -49,7 +52,7 @@ message SpeedMessage1 {
optional int32 field131 = 131 [default=0];
}
-message SpeedMessage1SubMessage {
+message GoogleMessage1SubMessage {
optional int32 field1 = 1 [default=0];
optional int32 field2 = 2 [default=0];
optional int32 field3 = 3 [default=0];
@@ -72,7 +75,7 @@ message SpeedMessage1SubMessage {
optional uint64 field300 = 300;
}
-message SpeedMessage2 {
+message GoogleMessage2 {
optional string field1 = 1;
optional int64 field3 = 3;
optional int64 field4 = 4;
@@ -112,7 +115,7 @@ message SpeedMessage2 {
repeated int32 field73 = 73;
optional int32 field20 = 20 [default=0];
optional string field24 = 24;
- optional SpeedMessage2GroupedMessage field31 = 31;
+ optional GoogleMessage2GroupedMessage field31 = 31;
}
repeated string field128 = 128;
optional int64 field131 = 131;
@@ -123,7 +126,7 @@ message SpeedMessage2 {
optional bool field206 = 206 [default=false];
}
-message SpeedMessage2GroupedMessage {
+message GoogleMessage2GroupedMessage {
optional float field1 = 1;
optional float field2 = 2;
optional float field3 = 3 [default=0.0];
diff --git a/benchmarks/benchmark_messages_proto3.proto b/benchmarks/benchmark_messages_proto3.proto
new file mode 100644
index 00000000..4ea39c22
--- /dev/null
+++ b/benchmarks/benchmark_messages_proto3.proto
@@ -0,0 +1,76 @@
+// Benchmark messages for proto3.
+
+syntax = "proto3";
+
+package benchmarks.p3;
+option java_package = "com.google.protobuf.benchmarks";
+
+// This is the default, but we specify it here explicitly.
+option optimize_for = SPEED;
+
+message GoogleMessage1 {
+ string field1 = 1;
+ string field9 = 9;
+ string field18 = 18;
+ bool field80 = 80;
+ bool field81 = 81;
+ int32 field2 = 2;
+ int32 field3 = 3;
+ int32 field280 = 280;
+ int32 field6 = 6;
+ int64 field22 = 22;
+ string field4 = 4;
+ repeated fixed64 field5 = 5;
+ bool field59 = 59;
+ string field7 = 7;
+ int32 field16 = 16;
+ int32 field130 = 130;
+ bool field12 = 12;
+ bool field17 = 17;
+ bool field13 = 13;
+ bool field14 = 14;
+ int32 field104 = 104;
+ int32 field100 = 100;
+ int32 field101 = 101;
+ string field102 = 102;
+ string field103 = 103;
+ int32 field29 = 29;
+ bool field30 = 30;
+ int32 field60 = 60;
+ int32 field271 = 271;
+ int32 field272 = 272;
+ int32 field150 = 150;
+ int32 field23 = 23;
+ bool field24 = 24;
+ int32 field25 = 25;
+ GoogleMessage1SubMessage field15 = 15;
+ bool field78 = 78;
+ int32 field67 = 67;
+ int32 field68 = 68;
+ int32 field128 = 128;
+ string field129 = 129;
+ int32 field131 = 131;
+}
+
+message GoogleMessage1SubMessage {
+ int32 field1 = 1;
+ int32 field2 = 2;
+ int32 field3 = 3;
+ string field15 = 15;
+ bool field12 = 12;
+ int64 field13 = 13;
+ int64 field14 = 14;
+ int32 field16 = 16;
+ int32 field19 = 19;
+ bool field20 = 20;
+ bool field28 = 28;
+ fixed64 field21 = 21;
+ int32 field22 = 22;
+ bool field23 = 23;
+ bool field206 = 206;
+ fixed32 field203 = 203;
+ int32 field204 = 204;
+ string field205 = 205;
+ uint64 field207 = 207;
+ uint64 field300 = 300;
+}
diff --git a/benchmarks/benchmarks.proto b/benchmarks/benchmarks.proto
new file mode 100644
index 00000000..a891eb9e
--- /dev/null
+++ b/benchmarks/benchmarks.proto
@@ -0,0 +1,102 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+package benchmarks;
+option java_package = "com.google.protobuf.benchmarks";
+
+message BenchmarkDataset {
+ // Name of the benchmark dataset. This should be unique across all datasets.
+ // Should only contain word characters: [a-zA-Z0-9_]
+ string name = 1;
+
+ // Fully-qualified name of the protobuf message for this dataset.
+ // It will be one of the messages defined benchmark_messages.proto.
+ // Implementations that do not support reflection can implement this with
+ // an explicit "if/else" chain that lists every possible message defined
+ // in this file.
+ string message_name = 2;
+
+ // The payload(s) for this dataset. They should be parsed or serialized
+ // in sequence, in a loop, ie.
+ //
+ // while (!benchmarkDone) { // Benchmark runner decides when to exit.
+ // for (i = 0; i < benchmark.payload.length; i++) {
+ // parse(benchmark.payload[i])
+ // }
+ // }
+ //
+ // This is intended to let datasets include a variety of data to provide
+ // potentially more realistic results than just parsing the same message
+ // over and over. A single message parsed repeatedly could yield unusually
+ // good branch prediction performance.
+ repeated bytes payload = 3;
+}
+
+// A benchmark can write out metrics that we will then upload to our metrics
+// database for tracking over time.
+message Metric {
+ // A unique ID for these results. Used for de-duping.
+ string guid = 1;
+
+ // The tags specify exactly what benchmark was run against the dataset.
+ // The specific benchmark suite can decide what these mean, but here are
+ // some common tags that have a predefined meaning:
+ //
+ // - "dataset": for tests that pertain to a specific dataset.
+ //
+ // For example:
+ //
+ // # Tests parsing from binary proto string using arenas.
+ // tags={
+ // dataset: "testalltypes",
+ // op: "parse",
+ // format: "binaryproto",
+ // input: "string"
+ // arena: "true"
+ // }
+ //
+ // # Tests serializing to JSON string.
+ // tags={
+ // dataset: "testalltypes",
+ // op: "serialize",
+ // format: "json",
+ // input: "string"
+ // }
+ map<string, string> labels = 2;
+
+ // Unit of measurement for the metric:
+ // - a speed test might be "mb_per_second" or "ops_per_second"
+ // - a size test might be "kb".
+ string unit = 3;
+
+ // Metric value.
+ double value = 4;
+}
diff --git a/benchmarks/generate_datasets.cc b/benchmarks/generate_datasets.cc
new file mode 100644
index 00000000..f6f30cd8
--- /dev/null
+++ b/benchmarks/generate_datasets.cc
@@ -0,0 +1,114 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+const char *file_prefix = "dataset.";
+const char *file_suffix = ".pb";
+
+#include <fstream>
+#include <iostream>
+#include "benchmarks.pb.h"
+#include "google_message1.h"
+#include "google_message2.h"
+
+using benchmarks::BenchmarkDataset;
+using google::protobuf::Descriptor;
+using google::protobuf::DescriptorPool;
+using google::protobuf::Message;
+using google::protobuf::MessageFactory;
+
+#define ARRAY_TO_STRING(arr) std::string(arr, arr + sizeof(arr))
+
+std::set<std::string> names;
+
+void WriteFileWithPayloads(const std::string& name,
+ const std::string& message_name,
+ const std::vector<std::string>& payload) {
+ if (!names.insert(name).second) {
+ std::cerr << "Duplicate test name: " << name << "\n";
+ abort();
+ }
+
+ // First verify that this message name exists in our set of benchmark messages
+ // and that these payloads are valid for the given message.
+ const Descriptor* d =
+ DescriptorPool::generated_pool()->FindMessageTypeByName(message_name);
+
+ if (!d) {
+ std::cerr << "For dataset " << name << ", no such message: "
+ << message_name << "\n";
+ abort();
+ }
+
+ Message* m = MessageFactory::generated_factory()->GetPrototype(d)->New();
+
+ for (size_t i = 0; i < payload.size(); i++) {
+ if (!m->ParseFromString(payload[i])) {
+ std::cerr << "For dataset " << name << ", payload[" << i << "] fails "
+ << "to parse\n";
+ abort();
+ }
+ }
+
+ BenchmarkDataset dataset;
+ dataset.set_name(name);
+ dataset.set_message_name(message_name);
+ for (size_t i = 0; i < payload.size(); i++) {
+ dataset.add_payload()->assign(payload[i]);
+ }
+
+ std::string serialized;
+ dataset.SerializeToString(&serialized);
+
+ std::ofstream writer;
+ std::string fname = file_prefix + name + file_suffix;
+ writer.open(fname);
+ writer << serialized;
+ writer.close();
+
+ std::cerr << "Wrote dataset: " << fname << "\n";
+}
+
+void WriteFile(const std::string& name, const std::string& message_name,
+ const std::string& payload) {
+ std::vector<std::string> payloads;
+ payloads.push_back(payload);
+ WriteFileWithPayloads(name, message_name, payloads);
+}
+
+int main() {
+ WriteFile("google_message1_proto3", "benchmarks.p3.GoogleMessage1",
+ ARRAY_TO_STRING(google_message1_dat));
+ WriteFile("google_message1_proto2", "benchmarks.p2.GoogleMessage1",
+ ARRAY_TO_STRING(google_message1_dat));
+
+ // Not in proto3 because it has a group, which is not supported.
+ WriteFile("google_message2", "benchmarks.p2.GoogleMessage2",
+ ARRAY_TO_STRING(google_message2_dat));
+}