diff options
Diffstat (limited to 'benchmarks/util')
-rw-r--r-- | benchmarks/util/__init__.py | 0 | ||||
-rwxr-xr-x | benchmarks/util/big_query_utils.py | 189 | ||||
-rw-r--r-- | benchmarks/util/data_proto2_to_proto3_util.h | 64 | ||||
-rw-r--r-- | benchmarks/util/gogo_data_scrubber.cc | 74 | ||||
-rw-r--r-- | benchmarks/util/proto3_data_stripper.cc | 74 | ||||
-rw-r--r-- | benchmarks/util/protoc-gen-gogoproto.cc | 103 | ||||
-rw-r--r-- | benchmarks/util/protoc-gen-proto2_to_proto3.cc | 115 | ||||
-rwxr-xr-x | benchmarks/util/run_and_upload.py | 292 | ||||
-rw-r--r-- | benchmarks/util/schema_proto2_to_proto3_util.h | 194 |
9 files changed, 1105 insertions, 0 deletions
diff --git a/benchmarks/util/__init__.py b/benchmarks/util/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/benchmarks/util/__init__.py diff --git a/benchmarks/util/big_query_utils.py b/benchmarks/util/big_query_utils.py new file mode 100755 index 00000000..aea55bbd --- /dev/null +++ b/benchmarks/util/big_query_utils.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python2.7 + +from __future__ import print_function +import argparse +import json +import uuid +import httplib2 + +from apiclient import discovery +from apiclient.errors import HttpError +from oauth2client.client import GoogleCredentials + +# 30 days in milliseconds +_EXPIRATION_MS = 30 * 24 * 60 * 60 * 1000 +NUM_RETRIES = 3 + + +def create_big_query(): + """Authenticates with cloud platform and gets a BiqQuery service object + """ + creds = GoogleCredentials.get_application_default() + return discovery.build( + 'bigquery', 'v2', credentials=creds, cache_discovery=False) + + +def create_dataset(biq_query, project_id, dataset_id): + is_success = True + body = { + 'datasetReference': { + 'projectId': project_id, + 'datasetId': dataset_id + } + } + + try: + dataset_req = biq_query.datasets().insert( + projectId=project_id, body=body) + dataset_req.execute(num_retries=NUM_RETRIES) + except HttpError as http_error: + if http_error.resp.status == 409: + print('Warning: The dataset %s already exists' % dataset_id) + else: + # Note: For more debugging info, print "http_error.content" + print('Error in creating dataset: %s. Err: %s' % (dataset_id, + http_error)) + is_success = False + return is_success + + +def create_table(big_query, project_id, dataset_id, table_id, table_schema, + description): + fields = [{ + 'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + return create_table2(big_query, project_id, dataset_id, table_id, fields, + description) + + +def create_partitioned_table(big_query, + project_id, + dataset_id, + table_id, + table_schema, + description, + partition_type='DAY', + expiration_ms=_EXPIRATION_MS): + """Creates a partitioned table. By default, a date-paritioned table is created with + each partition lasting 30 days after it was last modified. + """ + fields = [{ + 'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + return create_table2(big_query, project_id, dataset_id, table_id, fields, + description, partition_type, expiration_ms) + + +def create_table2(big_query, + project_id, + dataset_id, + table_id, + fields_schema, + description, + partition_type=None, + expiration_ms=None): + is_success = True + + body = { + 'description': description, + 'schema': { + 'fields': fields_schema + }, + 'tableReference': { + 'datasetId': dataset_id, + 'projectId': project_id, + 'tableId': table_id + } + } + + if partition_type and expiration_ms: + body["timePartitioning"] = { + "type": partition_type, + "expirationMs": expiration_ms + } + + try: + table_req = big_query.tables().insert( + projectId=project_id, datasetId=dataset_id, body=body) + res = table_req.execute(num_retries=NUM_RETRIES) + print('Successfully created %s "%s"' % (res['kind'], res['id'])) + except HttpError as http_error: + if http_error.resp.status == 409: + print('Warning: Table %s already exists' % table_id) + else: + print('Error in creating table: %s. Err: %s' % (table_id, + http_error)) + is_success = False + return is_success + + +def patch_table(big_query, project_id, dataset_id, table_id, fields_schema): + is_success = True + + body = { + 'schema': { + 'fields': fields_schema + }, + 'tableReference': { + 'datasetId': dataset_id, + 'projectId': project_id, + 'tableId': table_id + } + } + + try: + table_req = big_query.tables().patch( + projectId=project_id, + datasetId=dataset_id, + tableId=table_id, + body=body) + res = table_req.execute(num_retries=NUM_RETRIES) + print('Successfully patched %s "%s"' % (res['kind'], res['id'])) + except HttpError as http_error: + print('Error in creating table: %s. Err: %s' % (table_id, http_error)) + is_success = False + return is_success + + +def insert_rows(big_query, project_id, dataset_id, table_id, rows_list): + is_success = True + body = {'rows': rows_list} + try: + insert_req = big_query.tabledata().insertAll( + projectId=project_id, + datasetId=dataset_id, + tableId=table_id, + body=body) + res = insert_req.execute(num_retries=NUM_RETRIES) + if res.get('insertErrors', None): + print('Error inserting rows! Response: %s' % res) + is_success = False + except HttpError as http_error: + print('Error inserting rows to the table %s' % table_id) + is_success = False + + return is_success + + +def sync_query_job(big_query, project_id, query, timeout=5000): + query_data = {'query': query, 'timeoutMs': timeout} + query_job = None + try: + query_job = big_query.jobs().query( + projectId=project_id, + body=query_data).execute(num_retries=NUM_RETRIES) + except HttpError as http_error: + print('Query execute job failed with error: %s' % http_error) + print(http_error.content) + return query_job + + + # List of (column name, column type, description) tuples +def make_row(unique_row_id, row_values_dict): + """row_values_dict is a dictionary of column name and column value. + """ + return {'insertId': unique_row_id, 'json': row_values_dict} diff --git a/benchmarks/util/data_proto2_to_proto3_util.h b/benchmarks/util/data_proto2_to_proto3_util.h new file mode 100644 index 00000000..5eea8509 --- /dev/null +++ b/benchmarks/util/data_proto2_to_proto3_util.h @@ -0,0 +1,64 @@ +#ifndef PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_ +#define PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_ + +#include "google/protobuf/message.h" +#include "google/protobuf/descriptor.h" + +using google::protobuf::FieldDescriptor; +using google::protobuf::Message; +using google::protobuf::Reflection; + +namespace google { +namespace protobuf { +namespace util { + +class DataStripper { + public: + void StripMessage(Message *message) { + std::vector<const FieldDescriptor*> set_fields; + const Reflection* reflection = message->GetReflection(); + reflection->ListFields(*message, &set_fields); + + for (size_t i = 0; i < set_fields.size(); i++) { + const FieldDescriptor* field = set_fields[i]; + if (ShouldBeClear(field)) { + reflection->ClearField(message, field); + continue; + } + if (field->type() == FieldDescriptor::TYPE_MESSAGE) { + if (field->is_repeated()) { + for (int j = 0; j < reflection->FieldSize(*message, field); j++) { + StripMessage(reflection->MutableRepeatedMessage(message, field, j)); + } + } else { + StripMessage(reflection->MutableMessage(message, field)); + } + } + } + + reflection->MutableUnknownFields(message)->Clear(); + } + private: + virtual bool ShouldBeClear(const FieldDescriptor *field) = 0; +}; + +class GogoDataStripper : public DataStripper { + private: + virtual bool ShouldBeClear(const FieldDescriptor *field) { + return field->type() == FieldDescriptor::TYPE_GROUP; + } +}; + +class Proto3DataStripper : public DataStripper { + private: + virtual bool ShouldBeClear(const FieldDescriptor *field) { + return field->type() == FieldDescriptor::TYPE_GROUP || + field->is_extension(); + } +}; + +} // namespace util +} // namespace protobuf +} // namespace google + +#endif // PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_ diff --git a/benchmarks/util/gogo_data_scrubber.cc b/benchmarks/util/gogo_data_scrubber.cc new file mode 100644 index 00000000..9ef57b0d --- /dev/null +++ b/benchmarks/util/gogo_data_scrubber.cc @@ -0,0 +1,74 @@ +#include "benchmarks.pb.h" +#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" +#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" +#include "datasets/google_message2/benchmark_message2.pb.h" +#include "datasets/google_message3/benchmark_message3.pb.h" +#include "datasets/google_message4/benchmark_message4.pb.h" +#include "data_proto2_to_proto3_util.h" + +#include <fstream> + +using google::protobuf::util::GogoDataStripper; + +std::string ReadFile(const std::string& name) { + std::ifstream file(name.c_str()); + GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" + << name + << "', please make sure you are running this command from the benchmarks" + << " directory.\n"; + return std::string((std::istreambuf_iterator<char>(file)), + std::istreambuf_iterator<char>()); +} + +int main(int argc, char *argv[]) { + if (argc % 2 == 0 || argc == 1) { + std::cerr << "Usage: [input_files] [output_file_names] where " << + "input_files are one to one mapping to output_file_names." << + std::endl; + return 1; + } + + for (int i = argc / 2; i > 0; i--) { + const std::string &input_file = argv[i]; + const std::string &output_file = argv[i + argc / 2]; + + std::cerr << "Generating " << input_file + << " to " << output_file << std::endl; + benchmarks::BenchmarkDataset dataset; + Message* message; + std::string dataset_payload = ReadFile(input_file); + GOOGLE_CHECK(dataset.ParseFromString(dataset_payload)) + << "Can' t parse data file " << input_file; + + if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") { + message = new benchmarks::proto3::GoogleMessage1; + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") { + message = new benchmarks::proto2::GoogleMessage1; + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") { + message = new benchmarks::proto2::GoogleMessage2; + } else if (dataset.message_name() == + "benchmarks.google_message3.GoogleMessage3") { + message = new benchmarks::google_message3::GoogleMessage3; + } else if (dataset.message_name() == + "benchmarks.google_message4.GoogleMessage4") { + message = new benchmarks::google_message4::GoogleMessage4; + } else { + std::cerr << "Unknown message type: " << dataset.message_name(); + exit(1); + } + + for (int i = 0; i < dataset.payload_size(); i++) { + message->ParseFromString(dataset.payload(i)); + GogoDataStripper stripper; + stripper.StripMessage(message); + dataset.set_payload(i, message->SerializeAsString()); + } + + std::ofstream ofs(output_file); + ofs << dataset.SerializeAsString(); + ofs.close(); + } + + + return 0; +} diff --git a/benchmarks/util/proto3_data_stripper.cc b/benchmarks/util/proto3_data_stripper.cc new file mode 100644 index 00000000..3096c4c1 --- /dev/null +++ b/benchmarks/util/proto3_data_stripper.cc @@ -0,0 +1,74 @@ +#include "benchmarks.pb.h" +#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" +#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" +#include "datasets/google_message2/benchmark_message2.pb.h" +#include "datasets/google_message3/benchmark_message3.pb.h" +#include "datasets/google_message4/benchmark_message4.pb.h" +#include "data_proto2_to_proto3_util.h" + +#include <fstream> + +using google::protobuf::util::Proto3DataStripper; + +std::string ReadFile(const std::string& name) { + std::ifstream file(name.c_str()); + GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" + << name + << "', please make sure you are running this command from the benchmarks" + << " directory.\n"; + return std::string((std::istreambuf_iterator<char>(file)), + std::istreambuf_iterator<char>()); +} + +int main(int argc, char *argv[]) { + if (argc % 2 == 0 || argc == 1) { + std::cerr << "Usage: [input_files] [output_file_names] where " << + "input_files are one to one mapping to output_file_names." << + std::endl; + return 1; + } + + for (int i = argc / 2; i > 0; i--) { + const std::string &input_file = argv[i]; + const std::string &output_file = argv[i + argc / 2]; + + std::cerr << "Generating " << input_file + << " to " << output_file << std::endl; + benchmarks::BenchmarkDataset dataset; + Message* message; + std::string dataset_payload = ReadFile(input_file); + GOOGLE_CHECK(dataset.ParseFromString(dataset_payload)) + << "Can' t parse data file " << input_file; + + if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") { + message = new benchmarks::proto3::GoogleMessage1; + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") { + message = new benchmarks::proto2::GoogleMessage1; + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") { + message = new benchmarks::proto2::GoogleMessage2; + } else if (dataset.message_name() == + "benchmarks.google_message3.GoogleMessage3") { + message = new benchmarks::google_message3::GoogleMessage3; + } else if (dataset.message_name() == + "benchmarks.google_message4.GoogleMessage4") { + message = new benchmarks::google_message4::GoogleMessage4; + } else { + std::cerr << "Unknown message type: " << dataset.message_name(); + exit(1); + } + + for (int i = 0; i < dataset.payload_size(); i++) { + message->ParseFromString(dataset.payload(i)); + Proto3DataStripper stripper; + stripper.StripMessage(message); + dataset.set_payload(i, message->SerializeAsString()); + } + + std::ofstream ofs(output_file); + ofs << dataset.SerializeAsString(); + ofs.close(); + } + + + return 0; +} diff --git a/benchmarks/util/protoc-gen-gogoproto.cc b/benchmarks/util/protoc-gen-gogoproto.cc new file mode 100644 index 00000000..9c1b3d04 --- /dev/null +++ b/benchmarks/util/protoc-gen-gogoproto.cc @@ -0,0 +1,103 @@ +#include "google/protobuf/compiler/code_generator.h" +#include "google/protobuf/io/zero_copy_stream.h" +#include "google/protobuf/io/printer.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "schema_proto2_to_proto3_util.h" + +#include "google/protobuf/compiler/plugin.h" + +using google::protobuf::FileDescriptorProto; +using google::protobuf::FileDescriptor; +using google::protobuf::DescriptorPool; +using google::protobuf::io::Printer; +using google::protobuf::util::SchemaGroupStripper; +using google::protobuf::util::EnumScrubber; + +namespace google { +namespace protobuf { +namespace compiler { + +namespace { + +string StripProto(string filename) { + if (filename.substr(filename.size() - 11) == ".protodevel") { + // .protodevel + return filename.substr(0, filename.size() - 11); + } else { + // .proto + return filename.substr(0, filename.size() - 6); + } +} + +DescriptorPool new_pool_; + +} // namespace + +class GoGoProtoGenerator : public CodeGenerator { + public: + virtual bool GenerateAll(const std::vector<const FileDescriptor*>& files, + const string& parameter, + GeneratorContext* context, + string* error) const { + for (int i = 0; i < files.size(); i++) { + for (auto file : files) { + bool can_generate = + (new_pool_.FindFileByName(file->name()) == nullptr); + for (int j = 0; j < file->dependency_count(); j++) { + can_generate &= (new_pool_.FindFileByName( + file->dependency(j)->name()) != nullptr); + } + for (int j = 0; j < file->public_dependency_count(); j++) { + can_generate &= (new_pool_.FindFileByName( + file->public_dependency(j)->name()) != nullptr); + } + for (int j = 0; j < file->weak_dependency_count(); j++) { + can_generate &= (new_pool_.FindFileByName( + file->weak_dependency(j)->name()) != nullptr); + } + if (can_generate) { + Generate(file, parameter, context, error); + break; + } + } + } + + return true; + } + + virtual bool Generate(const FileDescriptor* file, + const string& parameter, + GeneratorContext* context, + string* error) const { + FileDescriptorProto new_file; + file->CopyTo(&new_file); + SchemaGroupStripper::StripFile(file, &new_file); + + EnumScrubber enum_scrubber; + enum_scrubber.ScrubFile(&new_file); + + string filename = file->name(); + string basename = StripProto(filename); + + std::vector<std::pair<string,string>> option_pairs; + ParseGeneratorParameter(parameter, &option_pairs); + + std::unique_ptr<google::protobuf::io::ZeroCopyOutputStream> output( + context->Open(basename + ".proto")); + string content = new_pool_.BuildFile(new_file)->DebugString(); + Printer printer(output.get(), '$'); + printer.WriteRaw(content.c_str(), content.size()); + + return true; + } +}; + +} // namespace compiler +} // namespace protobuf +} // namespace google + +int main(int argc, char* argv[]) { + google::protobuf::compiler::GoGoProtoGenerator generator; + return google::protobuf::compiler::PluginMain(argc, argv, &generator); +} diff --git a/benchmarks/util/protoc-gen-proto2_to_proto3.cc b/benchmarks/util/protoc-gen-proto2_to_proto3.cc new file mode 100644 index 00000000..d0a89023 --- /dev/null +++ b/benchmarks/util/protoc-gen-proto2_to_proto3.cc @@ -0,0 +1,115 @@ +#include "google/protobuf/compiler/code_generator.h" +#include "google/protobuf/io/zero_copy_stream.h" +#include "google/protobuf/io/printer.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "schema_proto2_to_proto3_util.h" + +#include "google/protobuf/compiler/plugin.h" + +using google::protobuf::FileDescriptorProto; +using google::protobuf::FileDescriptor; +using google::protobuf::DescriptorPool; +using google::protobuf::io::Printer; +using google::protobuf::util::SchemaGroupStripper; +using google::protobuf::util::EnumScrubber; +using google::protobuf::util::ExtensionStripper; +using google::protobuf::util::FieldScrubber; + +namespace google { +namespace protobuf { +namespace compiler { + +namespace { + +string StripProto(string filename) { + return filename.substr(0, filename.rfind(".proto")); +} + +DescriptorPool* GetPool() { + static DescriptorPool *pool = new DescriptorPool(); + return pool; +} + +} // namespace + +class Proto2ToProto3Generator final : public CodeGenerator { + public: + bool GenerateAll(const std::vector<const FileDescriptor*>& files, + const string& parameter, + GeneratorContext* context, + string* error) const { + for (int i = 0; i < files.size(); i++) { + for (auto file : files) { + if (CanGenerate(file)) { + Generate(file, parameter, context, error); + break; + } + } + } + + return true; + } + + bool Generate(const FileDescriptor* file, + const string& parameter, + GeneratorContext* context, + string* error) const { + FileDescriptorProto new_file; + file->CopyTo(&new_file); + SchemaGroupStripper::StripFile(file, &new_file); + + EnumScrubber enum_scrubber; + enum_scrubber.ScrubFile(&new_file); + ExtensionStripper::StripFile(&new_file); + FieldScrubber::ScrubFile(&new_file); + new_file.set_syntax("proto3"); + + string filename = file->name(); + string basename = StripProto(filename); + + std::vector<std::pair<string,string>> option_pairs; + ParseGeneratorParameter(parameter, &option_pairs); + + std::unique_ptr<google::protobuf::io::ZeroCopyOutputStream> output( + context->Open(basename + ".proto")); + string content = GetPool()->BuildFile(new_file)->DebugString(); + Printer printer(output.get(), '$'); + printer.WriteRaw(content.c_str(), content.size()); + + return true; + } + private: + bool CanGenerate(const FileDescriptor* file) const { + if (GetPool()->FindFileByName(file->name()) != nullptr) { + return false; + } + for (int j = 0; j < file->dependency_count(); j++) { + if (GetPool()->FindFileByName(file->dependency(j)->name()) == nullptr) { + return false; + } + } + for (int j = 0; j < file->public_dependency_count(); j++) { + if (GetPool()->FindFileByName( + file->public_dependency(j)->name()) == nullptr) { + return false; + } + } + for (int j = 0; j < file->weak_dependency_count(); j++) { + if (GetPool()->FindFileByName( + file->weak_dependency(j)->name()) == nullptr) { + return false; + } + } + return true; + } +}; + +} // namespace compiler +} // namespace protobuf +} // namespace google + +int main(int argc, char* argv[]) { + google::protobuf::compiler::Proto2ToProto3Generator generator; + return google::protobuf::compiler::PluginMain(argc, argv, &generator); +} diff --git a/benchmarks/util/run_and_upload.py b/benchmarks/util/run_and_upload.py new file mode 100755 index 00000000..43c9fa2d --- /dev/null +++ b/benchmarks/util/run_and_upload.py @@ -0,0 +1,292 @@ +from __future__ import print_function +from __future__ import absolute_import +import argparse +import os +import re +import copy +import uuid +import calendar +import time +from . import big_query_utils +import datetime +import json +# This import depends on the automake rule protoc_middleman, please make sure +# protoc_middleman has been built before run this file. +import os.path, sys +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +import tmp.benchmarks_pb2 as benchmarks_pb2 +from click.types import STRING + +_PROJECT_ID = 'grpc-testing' +_DATASET = 'protobuf_benchmark_result' +_TABLE = 'opensource_result_v1' +_NOW = "%d%02d%02d" % (datetime.datetime.now().year, + datetime.datetime.now().month, + datetime.datetime.now().day) + +file_size_map = {} + +def get_data_size(file_name): + if file_name in file_size_map: + return file_size_map[file_name] + benchmark_dataset = benchmarks_pb2.BenchmarkDataset() + benchmark_dataset.ParseFromString( + open(os.path.dirname(os.path.abspath(__file__)) + "/../" + file_name).read()) + size = 0 + count = 0 + for payload in benchmark_dataset.payload: + size += len(payload) + count += 1 + file_size_map[file_name] = (size, 1.0 * size / count) + return size, 1.0 * size / count + + +def extract_file_name(file_name): + name_list = re.split("[/\.]", file_name) + short_file_name = "" + for name in name_list: + if name[:14] == "google_message": + short_file_name = name + return short_file_name + + +cpp_result = [] +python_result = [] +java_result = [] +go_result = [] + + +# CPP results example: +# [ +# "benchmarks": [ +# { +# "bytes_per_second": int, +# "cpu_time": int, +# "name: string, +# "time_unit: string, +# ... +# }, +# ... +# ], +# ... +# ] +def parse_cpp_result(filename): + global cpp_result + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + results = json.loads(f.read()) + for benchmark in results["benchmarks"]: + data_filename = "".join( + re.split("(_parse_|_serialize)", benchmark["name"])[0]) + behavior = benchmark["name"][len(data_filename) + 1:] + cpp_result.append({ + "language": "cpp", + "dataFileName": data_filename, + "behavior": behavior, + "throughput": benchmark["bytes_per_second"] / 2.0 ** 20 + }) + + +# Python results example: +# [ +# [ +# { +# "filename": string, +# "benchmarks": { +# behavior: results, +# ... +# }, +# "message_name": STRING +# }, +# ... +# ], #pure-python +# ... +# ] +def parse_python_result(filename): + global python_result + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + results_list = json.loads(f.read()) + for results in results_list: + for result in results: + _, avg_size = get_data_size(result["filename"]) + for behavior in result["benchmarks"]: + python_result.append({ + "language": "python", + "dataFileName": extract_file_name(result["filename"]), + "behavior": behavior, + "throughput": avg_size / + result["benchmarks"][behavior] * 1e9 / 2 ** 20 + }) + + +# Java results example: +# [ +# { +# "id": string, +# "instrumentSpec": {...}, +# "measurements": [ +# { +# "weight": float, +# "value": { +# "magnitude": float, +# "unit": string +# }, +# ... +# }, +# ... +# ], +# "run": {...}, +# "scenario": { +# "benchmarkSpec": { +# "methodName": string, +# "parameters": { +# defined parameters in the benchmark: parameters value +# }, +# ... +# }, +# ... +# } +# +# }, +# ... +# ] +def parse_java_result(filename): + global average_bytes_per_message, java_result + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + results = json.loads(f.read()) + for result in results: + total_weight = 0 + total_value = 0 + for measurement in result["measurements"]: + total_weight += measurement["weight"] + total_value += measurement["value"]["magnitude"] + avg_time = total_value * 1.0 / total_weight + total_size, _ = get_data_size( + result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) + java_result.append({ + "language": "java", + "throughput": total_size / avg_time * 1e9 / 2 ** 20, + "behavior": result["scenario"]["benchmarkSpec"]["methodName"], + "dataFileName": extract_file_name( + result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) + }) + + +# Go benchmark results: +# +# goos: linux +# goarch: amd64 +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Unmarshal-12 3000 705784 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Marshal-12 2000 634648 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Size-12 5000 244174 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Clone-12 300 4120954 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op +# PASS +# ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s +def parse_go_result(filename): + global go_result + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + for line in f: + result_list = re.split("[\ \t]+", line) + if result_list[0][:9] != "Benchmark": + continue + first_slash_index = result_list[0].find('/') + last_slash_index = result_list[0].rfind('/') + full_filename = result_list[0][first_slash_index+4:last_slash_index] # delete ../ prefix + total_bytes, _ = get_data_size(full_filename) + behavior_with_suffix = result_list[0][last_slash_index+1:] + last_dash = behavior_with_suffix.rfind("-") + if last_dash == -1: + behavior = behavior_with_suffix + else: + behavior = behavior_with_suffix[:last_dash] + go_result.append({ + "dataFilename": extract_file_name(full_filename), + "throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20, + "behavior": behavior, + "language": "go" + }) + + +def get_metadata(): + build_number = os.getenv('BUILD_NUMBER') + build_url = os.getenv('BUILD_URL') + job_name = os.getenv('JOB_NAME') + git_commit = os.getenv('GIT_COMMIT') + # actual commit is the actual head of PR that is getting tested + git_actual_commit = os.getenv('ghprbActualCommit') + + utc_timestamp = str(calendar.timegm(time.gmtime())) + metadata = {'created': utc_timestamp} + + if build_number: + metadata['buildNumber'] = build_number + if build_url: + metadata['buildUrl'] = build_url + if job_name: + metadata['jobName'] = job_name + if git_commit: + metadata['gitCommit'] = git_commit + if git_actual_commit: + metadata['gitActualCommit'] = git_actual_commit + + return metadata + + +def upload_result(result_list, metadata): + for result in result_list: + new_result = copy.deepcopy(result) + new_result['metadata'] = metadata + bq = big_query_utils.create_big_query() + row = big_query_utils.make_row(str(uuid.uuid4()), new_result) + if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET, + _TABLE + "$" + _NOW, + [row]): + print('Error when uploading result', new_result) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-cpp", "--cpp_input_file", + help="The CPP benchmark result file's name", + default="") + parser.add_argument("-java", "--java_input_file", + help="The Java benchmark result file's name", + default="") + parser.add_argument("-python", "--python_input_file", + help="The Python benchmark result file's name", + default="") + parser.add_argument("-go", "--go_input_file", + help="The golang benchmark result file's name", + default="") + args = parser.parse_args() + + parse_cpp_result(args.cpp_input_file) + parse_python_result(args.python_input_file) + parse_java_result(args.java_input_file) + parse_go_result(args.go_input_file) + + metadata = get_metadata() + print("uploading cpp results...") + upload_result(cpp_result, metadata) + print("uploading java results...") + upload_result(java_result, metadata) + print("uploading python results...") + upload_result(python_result, metadata) + print("uploading go results...") + upload_result(go_result, metadata) diff --git a/benchmarks/util/schema_proto2_to_proto3_util.h b/benchmarks/util/schema_proto2_to_proto3_util.h new file mode 100644 index 00000000..0079f6f1 --- /dev/null +++ b/benchmarks/util/schema_proto2_to_proto3_util.h @@ -0,0 +1,194 @@ +#ifndef PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_ +#define PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_ + +#include "google/protobuf/message.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" + +#include <sstream> +#include <algorithm> + +using google::protobuf::Descriptor; +using google::protobuf::DescriptorProto; +using google::protobuf::FileDescriptorProto; +using google::protobuf::FieldDescriptorProto; +using google::protobuf::Message; +using google::protobuf::EnumValueDescriptorProto; + +namespace google { +namespace protobuf { +namespace util { + +class SchemaGroupStripper { + + public: + static void StripFile(const FileDescriptor* old_file, + FileDescriptorProto *file) { + for (int i = file->mutable_message_type()->size() - 1; i >= 0; i--) { + if (IsMessageSet(old_file->message_type(i))) { + file->mutable_message_type()->DeleteSubrange(i, 1); + continue; + } + StripMessage(old_file->message_type(i), file->mutable_message_type(i)); + } + for (int i = file->mutable_extension()->size() - 1; i >= 0; i--) { + auto field = old_file->extension(i); + if (field->type() == FieldDescriptor::TYPE_GROUP || + IsMessageSet(field->message_type()) || + IsMessageSet(field->containing_type())) { + file->mutable_extension()->DeleteSubrange(i, 1); + } + } + } + + private: + static bool IsMessageSet(const Descriptor *descriptor) { + if (descriptor != nullptr + && descriptor->options().message_set_wire_format()) { + return true; + } + return false; + } + + static void StripMessage(const Descriptor *old_message, + DescriptorProto *new_message) { + for (int i = new_message->mutable_field()->size() - 1; i >= 0; i--) { + if (old_message->field(i)->type() == FieldDescriptor::TYPE_GROUP || + IsMessageSet(old_message->field(i)->message_type())) { + new_message->mutable_field()->DeleteSubrange(i, 1); + } + } + for (int i = new_message->mutable_extension()->size() - 1; i >= 0; i--) { + auto field_type_name = new_message->mutable_extension(i)->type_name(); + if (old_message->extension(i)->type() == FieldDescriptor::TYPE_GROUP || + IsMessageSet(old_message->extension(i)->containing_type()) || + IsMessageSet(old_message->extension(i)->message_type())) { + new_message->mutable_extension()->DeleteSubrange(i, 1); + } + } + for (int i = 0; i < new_message->mutable_nested_type()->size(); i++) { + StripMessage(old_message->nested_type(i), + new_message->mutable_nested_type(i)); + } + } + +}; + +class EnumScrubber { + + public: + EnumScrubber() + : total_added_(0) { + } + + void ScrubFile(FileDescriptorProto *file) { + for (int i = 0; i < file->enum_type_size(); i++) { + ScrubEnum(file->mutable_enum_type(i)); + } + for (int i = 0; i < file->mutable_message_type()->size(); i++) { + ScrubMessage(file->mutable_message_type(i)); + } + } + + private: + void ScrubEnum(EnumDescriptorProto *enum_type) { + if (enum_type->value(0).number() != 0) { + bool has_zero = false; + for (int j = 0; j < enum_type->value().size(); j++) { + if (enum_type->value(j).number() == 0) { + EnumValueDescriptorProto temp_enum_value; + temp_enum_value.CopyFrom(enum_type->value(j)); + enum_type->mutable_value(j)->CopyFrom(enum_type->value(0)); + enum_type->mutable_value(0)->CopyFrom(temp_enum_value); + has_zero = true; + break; + } + } + if (!has_zero) { + enum_type->mutable_value()->Add(); + for (int i = enum_type->mutable_value()->size() - 1; i > 0; i--) { + enum_type->mutable_value(i)->CopyFrom( + *enum_type->mutable_value(i - 1)); + } + enum_type->mutable_value(0)->set_number(0); + enum_type->mutable_value(0)->set_name("ADDED_ZERO_VALUE_" + + std::to_string(total_added_++)); + } + } + + } + + void ScrubMessage(DescriptorProto *message_type) { + for (int i = 0; i < message_type->mutable_enum_type()->size(); i++) { + ScrubEnum(message_type->mutable_enum_type(i)); + } + for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) { + ScrubMessage(message_type->mutable_nested_type(i)); + } + } + + int total_added_; +}; + +class ExtensionStripper { + public: + static void StripFile(FileDescriptorProto *file) { + for (int i = 0; i < file->mutable_message_type()->size(); i++) { + StripMessage(file->mutable_message_type(i)); + } + file->mutable_extension()->Clear(); + } + private: + static void StripMessage(DescriptorProto *message_type) { + message_type->mutable_extension()->Clear(); + message_type->clear_extension_range(); + for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) { + StripMessage(message_type->mutable_nested_type(i)); + } + } +}; + + +class FieldScrubber { + public: + static void ScrubFile(FileDescriptorProto *file) { + for (int i = 0; i < file->mutable_message_type()->size(); i++) { + ScrubMessage(file->mutable_message_type(i)); + } + for (int i = 0; i < file->mutable_extension()->size(); i++) { + file->mutable_extension(i)->clear_default_value(); + if (ShouldClearLabel(file->mutable_extension(i))) { + file->mutable_extension(i)->clear_label(); + } + } + } + private: + static bool ShouldClearLabel(const FieldDescriptorProto *field) { + return field->label() == FieldDescriptorProto::LABEL_REQUIRED; + } + + static void ScrubMessage(DescriptorProto *message_type) { + message_type->mutable_extension()->Clear(); + for (int i = 0; i < message_type->mutable_extension()->size(); i++) { + message_type->mutable_extension(i)->clear_default_value(); + if (ShouldClearLabel(message_type->mutable_extension(i))) { + message_type->mutable_extension(i)->clear_label(); + } + } + for (int i = 0; i < message_type->mutable_field()->size(); i++) { + message_type->mutable_field(i)->clear_default_value(); + if (ShouldClearLabel(message_type->mutable_field(i))) { + message_type->mutable_field(i)->clear_label(); + } + } + for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) { + ScrubMessage(message_type->mutable_nested_type(i)); + } + } +}; + +} // namespace util +} // namespace protobuf +} // namespace google + +#endif // PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_ |