9 files changed, 1105 insertions, 0 deletions
diff --git a/benchmarks/util/__init__.py b/benchmarks/util/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/benchmarks/util/__init__.py
diff --git a/benchmarks/util/big_query_utils.py b/benchmarks/util/big_query_utils.py
new file mode 100755
index 00000000..aea55bbd
--- /dev/null
+++ b/benchmarks/util/big_query_utils.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python2.7
+
+from __future__ import print_function
+import argparse
+import json
+import uuid
+import httplib2
+
+from apiclient import discovery
+from apiclient.errors import HttpError
+from oauth2client.client import GoogleCredentials
+
+# 30 days in milliseconds
+_EXPIRATION_MS = 30 * 24 * 60 * 60 * 1000
+NUM_RETRIES = 3
+
+
+def create_big_query():
+    """Authenticates with cloud platform and gets a BiqQuery service object
+  """
+    creds = GoogleCredentials.get_application_default()
+    return discovery.build(
+        'bigquery', 'v2', credentials=creds, cache_discovery=False)
+
+
+def create_dataset(biq_query, project_id, dataset_id):
+    is_success = True
+    body = {
+        'datasetReference': {
+            'projectId': project_id,
+            'datasetId': dataset_id
+        }
+    }
+
+    try:
+        dataset_req = biq_query.datasets().insert(
+            projectId=project_id, body=body)
+        dataset_req.execute(num_retries=NUM_RETRIES)
+    except HttpError as http_error:
+        if http_error.resp.status == 409:
+            print('Warning: The dataset %s already exists' % dataset_id)
+        else:
+            # Note: For more debugging info, print "http_error.content"
+            print('Error in creating dataset: %s. Err: %s' % (dataset_id,
+                                                              http_error))
+            is_success = False
+    return is_success
+
+
+def create_table(big_query, project_id, dataset_id, table_id, table_schema,
+                 description):
+    fields = [{
+        'name': field_name,
+        'type': field_type,
+        'description': field_description
+    } for (field_name, field_type, field_description) in table_schema]
+    return create_table2(big_query, project_id, dataset_id, table_id, fields,
+                         description)
+
+
+def create_partitioned_table(big_query,
+                             project_id,
+                             dataset_id,
+                             table_id,
+                             table_schema,
+                             description,
+                             partition_type='DAY',
+                             expiration_ms=_EXPIRATION_MS):
+    """Creates a partitioned table. By default, a date-paritioned table is created with
+  each partition lasting 30 days after it was last modified.
+  """
+    fields = [{
+        'name': field_name,
+        'type': field_type,
+        'description': field_description
+    } for (field_name, field_type, field_description) in table_schema]
+    return create_table2(big_query, project_id, dataset_id, table_id, fields,
+                         description, partition_type, expiration_ms)
+
+
+def create_table2(big_query,
+                  project_id,
+                  dataset_id,
+                  table_id,
+                  fields_schema,
+                  description,
+                  partition_type=None,
+                  expiration_ms=None):
+    is_success = True
+
+    body = {
+        'description': description,
+        'schema': {
+            'fields': fields_schema
+        },
+        'tableReference': {
+            'datasetId': dataset_id,
+            'projectId': project_id,
+            'tableId': table_id
+        }
+    }
+
+    if partition_type and expiration_ms:
+        body["timePartitioning"] = {
+            "type": partition_type,
+            "expirationMs": expiration_ms
+        }
+
+    try:
+        table_req = big_query.tables().insert(
+            projectId=project_id, datasetId=dataset_id, body=body)
+        res = table_req.execute(num_retries=NUM_RETRIES)
+        print('Successfully created %s "%s"' % (res['kind'], res['id']))
+    except HttpError as http_error:
+        if http_error.resp.status == 409:
+            print('Warning: Table %s already exists' % table_id)
+        else:
+            print('Error in creating table: %s. Err: %s' % (table_id,
+                                                            http_error))
+            is_success = False
+    return is_success
+
+
+def patch_table(big_query, project_id, dataset_id, table_id, fields_schema):
+    is_success = True
+
+    body = {
+        'schema': {
+            'fields': fields_schema
+        },
+        'tableReference': {
+            'datasetId': dataset_id,
+            'projectId': project_id,
+            'tableId': table_id
+        }
+    }
+
+    try:
+        table_req = big_query.tables().patch(
+            projectId=project_id,
+            datasetId=dataset_id,
+            tableId=table_id,
+            body=body)
+        res = table_req.execute(num_retries=NUM_RETRIES)
+        print('Successfully patched %s "%s"' % (res['kind'], res['id']))
+    except HttpError as http_error:
+        print('Error in creating table: %s. Err: %s' % (table_id, http_error))
+        is_success = False
+    return is_success
+
+
+def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
+    is_success = True
+    body = {'rows': rows_list}
+    try:
+        insert_req = big_query.tabledata().insertAll(
+            projectId=project_id,
+            datasetId=dataset_id,
+            tableId=table_id,
+            body=body)
+        res = insert_req.execute(num_retries=NUM_RETRIES)
+        if res.get('insertErrors', None):
+            print('Error inserting rows! Response: %s' % res)
+            is_success = False
+    except HttpError as http_error:
+        print('Error inserting rows to the table %s' % table_id)
+        is_success = False
+
+    return is_success
+
+
+def sync_query_job(big_query, project_id, query, timeout=5000):
+    query_data = {'query': query, 'timeoutMs': timeout}
+    query_job = None
+    try:
+        query_job = big_query.jobs().query(
+            projectId=project_id,
+            body=query_data).execute(num_retries=NUM_RETRIES)
+    except HttpError as http_error:
+        print('Query execute job failed with error: %s' % http_error)
+        print(http_error.content)
+    return query_job
+
+
+    # List of (column name, column type, description) tuples
+def make_row(unique_row_id, row_values_dict):
+    """row_values_dict is a dictionary of column name and column value.
+  """
+    return {'insertId': unique_row_id, 'json': row_values_dict}
diff --git a/benchmarks/util/data_proto2_to_proto3_util.h b/benchmarks/util/data_proto2_to_proto3_util.h
new file mode 100644
index 00000000..5eea8509
--- /dev/null
+++ b/benchmarks/util/data_proto2_to_proto3_util.h
@@ -0,0 +1,64 @@
+#ifndef PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_
+#define PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_
+
+#include "google/protobuf/message.h"
+#include "google/protobuf/descriptor.h"
+
+using google::protobuf::FieldDescriptor;
+using google::protobuf::Message;
+using google::protobuf::Reflection;
+
+namespace google {
+namespace protobuf {
+namespace util {
+
+class DataStripper {
+ public:
+  void StripMessage(Message *message) {
+    std::vector<const FieldDescriptor*> set_fields;
+    const Reflection* reflection = message->GetReflection();
+    reflection->ListFields(*message, &set_fields);
+
+    for (size_t i = 0; i < set_fields.size(); i++) {
+      const FieldDescriptor* field = set_fields[i];
+      if (ShouldBeClear(field)) {
+        reflection->ClearField(message, field);
+        continue;
+      }
+      if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
+        if (field->is_repeated()) {
+          for (int j = 0; j < reflection->FieldSize(*message, field); j++) {
+            StripMessage(reflection->MutableRepeatedMessage(message, field, j));
+          }
+        } else {
+          StripMessage(reflection->MutableMessage(message, field));
+        }
+      }
+    }
+
+    reflection->MutableUnknownFields(message)->Clear();
+  }
+ private:
+  virtual bool ShouldBeClear(const FieldDescriptor *field) = 0;
+};
+
+class GogoDataStripper : public DataStripper {
+ private:
+  virtual bool ShouldBeClear(const FieldDescriptor *field) {
+    return field->type() == FieldDescriptor::TYPE_GROUP;
+  }
+};
+
+class Proto3DataStripper : public DataStripper {
+ private:
+  virtual bool ShouldBeClear(const FieldDescriptor *field) {
+    return field->type() == FieldDescriptor::TYPE_GROUP ||
+           field->is_extension();
+  }
+};
+
+}  // namespace util
+}  // namespace protobuf
+}  // namespace google
+
+#endif  // PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_
diff --git a/benchmarks/util/gogo_data_scrubber.cc b/benchmarks/util/gogo_data_scrubber.cc
new file mode 100644
index 00000000..9ef57b0d
--- /dev/null
+++ b/benchmarks/util/gogo_data_scrubber.cc
@@ -0,0 +1,74 @@
+#include "benchmarks.pb.h"
+#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h"
+#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h"
+#include "datasets/google_message2/benchmark_message2.pb.h"
+#include "datasets/google_message3/benchmark_message3.pb.h"
+#include "datasets/google_message4/benchmark_message4.pb.h"
+#include "data_proto2_to_proto3_util.h"
+
+#include <fstream>
+
+using google::protobuf::util::GogoDataStripper;
+
+std::string ReadFile(const std::string& name) {
+  std::ifstream file(name.c_str());
+  GOOGLE_CHECK(file.is_open()) << "Couldn't find file '"
+      << name
+      << "', please make sure you are running this command from the benchmarks"
+      << " directory.\n";
+  return std::string((std::istreambuf_iterator<char>(file)),
+                     std::istreambuf_iterator<char>());
+}
+
+int main(int argc, char *argv[]) {
+  if (argc % 2 == 0 || argc == 1) {
+    std::cerr << "Usage: [input_files] [output_file_names] where " <<
+        "input_files are one to one mapping to output_file_names." <<
+        std::endl;
+    return 1;
+  }
+
+  for (int i = argc / 2; i > 0; i--) {
+    const std::string &input_file = argv[i];
+    const std::string &output_file = argv[i + argc / 2];
+
+    std::cerr << "Generating " << input_file
+        << " to " << output_file << std::endl;
+    benchmarks::BenchmarkDataset dataset;
+    Message* message;
+    std::string dataset_payload = ReadFile(input_file);
+    GOOGLE_CHECK(dataset.ParseFromString(dataset_payload))
+      << "Can' t parse data file " << input_file;
+
+    if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") {
+      message = new benchmarks::proto3::GoogleMessage1;
+    } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") {
+      message = new benchmarks::proto2::GoogleMessage1;
+    } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") {
+      message = new benchmarks::proto2::GoogleMessage2;
+    } else if (dataset.message_name() ==
+        "benchmarks.google_message3.GoogleMessage3") {
+      message = new benchmarks::google_message3::GoogleMessage3;
+    } else if (dataset.message_name() ==
+        "benchmarks.google_message4.GoogleMessage4") {
+      message = new benchmarks::google_message4::GoogleMessage4;
+    } else {
+      std::cerr << "Unknown message type: " << dataset.message_name();
+      exit(1);
+    }
+
+    for (int i = 0; i < dataset.payload_size(); i++) {
+      message->ParseFromString(dataset.payload(i));
+      GogoDataStripper stripper;
+      stripper.StripMessage(message);
+      dataset.set_payload(i, message->SerializeAsString());
+    }
+
+    std::ofstream ofs(output_file);
+    ofs << dataset.SerializeAsString();
+    ofs.close();
+  }
+
+
+  return 0;
+}
diff --git a/benchmarks/util/proto3_data_stripper.cc b/benchmarks/util/proto3_data_stripper.cc
new file mode 100644
index 00000000..3096c4c1
--- /dev/null
+++ b/benchmarks/util/proto3_data_stripper.cc
@@ -0,0 +1,74 @@
+#include "benchmarks.pb.h"
+#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h"
+#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h"
+#include "datasets/google_message2/benchmark_message2.pb.h"
+#include "datasets/google_message3/benchmark_message3.pb.h"
+#include "datasets/google_message4/benchmark_message4.pb.h"
+#include "data_proto2_to_proto3_util.h"
+
+#include <fstream>
+
+using google::protobuf::util::Proto3DataStripper;
+
+std::string ReadFile(const std::string& name) {
+  std::ifstream file(name.c_str());
+  GOOGLE_CHECK(file.is_open()) << "Couldn't find file '"
+      << name
+      << "', please make sure you are running this command from the benchmarks"
+      << " directory.\n";
+  return std::string((std::istreambuf_iterator<char>(file)),
+                     std::istreambuf_iterator<char>());
+}
+
+int main(int argc, char *argv[]) {
+  if (argc % 2 == 0 || argc == 1) {
+    std::cerr << "Usage: [input_files] [output_file_names] where " <<
+        "input_files are one to one mapping to output_file_names." <<
+        std::endl;
+    return 1;
+  }
+
+  for (int i = argc / 2; i > 0; i--) {
+    const std::string &input_file = argv[i];
+    const std::string &output_file = argv[i + argc / 2];
+
+    std::cerr << "Generating " << input_file
+        << " to " << output_file << std::endl;
+    benchmarks::BenchmarkDataset dataset;
+    Message* message;
+    std::string dataset_payload = ReadFile(input_file);
+    GOOGLE_CHECK(dataset.ParseFromString(dataset_payload))
+      << "Can' t parse data file " << input_file;
+
+    if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") {
+      message = new benchmarks::proto3::GoogleMessage1;
+    } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") {
+      message = new benchmarks::proto2::GoogleMessage1;
+    } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") {
+      message = new benchmarks::proto2::GoogleMessage2;
+    } else if (dataset.message_name() ==
+        "benchmarks.google_message3.GoogleMessage3") {
+      message = new benchmarks::google_message3::GoogleMessage3;
+    } else if (dataset.message_name() ==
+        "benchmarks.google_message4.GoogleMessage4") {
+      message = new benchmarks::google_message4::GoogleMessage4;
+    } else {
+      std::cerr << "Unknown message type: " << dataset.message_name();
+      exit(1);
+    }
+
+    for (int i = 0; i < dataset.payload_size(); i++) {
+      message->ParseFromString(dataset.payload(i));
+      Proto3DataStripper stripper;
+      stripper.StripMessage(message);
+      dataset.set_payload(i, message->SerializeAsString());
+    }
+
+    std::ofstream ofs(output_file);
+    ofs << dataset.SerializeAsString();
+    ofs.close();
+  }
+
+
+  return 0;
+}
diff --git a/benchmarks/util/protoc-gen-gogoproto.cc b/benchmarks/util/protoc-gen-gogoproto.cc
new file mode 100644
index 00000000..9c1b3d04
--- /dev/null
+++ b/benchmarks/util/protoc-gen-gogoproto.cc
@@ -0,0 +1,103 @@
+#include "google/protobuf/compiler/code_generator.h"
+#include "google/protobuf/io/zero_copy_stream.h"
+#include "google/protobuf/io/printer.h"
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/descriptor.pb.h"
+#include "schema_proto2_to_proto3_util.h"
+
+#include "google/protobuf/compiler/plugin.h"
+
+using google::protobuf::FileDescriptorProto;
+using google::protobuf::FileDescriptor;
+using google::protobuf::DescriptorPool;
+using google::protobuf::io::Printer;
+using google::protobuf::util::SchemaGroupStripper;
+using google::protobuf::util::EnumScrubber;
+
+namespace google {
+namespace protobuf {
+namespace compiler {
+
+namespace {
+
+string StripProto(string filename) {
+  if (filename.substr(filename.size() - 11) == ".protodevel") {
+    // .protodevel
+    return filename.substr(0, filename.size() - 11);
+  } else {
+    // .proto
+    return filename.substr(0, filename.size() - 6);
+  }
+}
+
+DescriptorPool new_pool_;
+
+}  // namespace
+
+class GoGoProtoGenerator : public CodeGenerator {
+ public:
+  virtual bool GenerateAll(const std::vector<const FileDescriptor*>& files,
+                           const string& parameter,
+                           GeneratorContext* context,
+                           string* error) const {
+    for (int i = 0; i < files.size(); i++) {
+      for (auto file : files) {
+        bool can_generate =
+            (new_pool_.FindFileByName(file->name()) == nullptr);
+        for (int j = 0; j < file->dependency_count(); j++) {
+          can_generate &= (new_pool_.FindFileByName(
+              file->dependency(j)->name()) != nullptr);
+        }
+        for (int j = 0; j < file->public_dependency_count(); j++) {
+          can_generate &= (new_pool_.FindFileByName(
+              file->public_dependency(j)->name()) != nullptr);
+        }
+        for (int j = 0; j < file->weak_dependency_count(); j++) {
+          can_generate &= (new_pool_.FindFileByName(
+              file->weak_dependency(j)->name()) != nullptr);
+        }
+        if (can_generate) {
+          Generate(file, parameter, context, error);
+          break;
+        }
+      }
+    }
+
+    return true;
+  }
+
+  virtual bool Generate(const FileDescriptor* file,
+                        const string& parameter,
+                        GeneratorContext* context,
+                        string* error) const {
+    FileDescriptorProto new_file;
+    file->CopyTo(&new_file);
+    SchemaGroupStripper::StripFile(file, &new_file);
+
+    EnumScrubber enum_scrubber;
+    enum_scrubber.ScrubFile(&new_file);
+
+    string filename = file->name();
+    string basename = StripProto(filename);
+
+    std::vector<std::pair<string,string>> option_pairs;
+    ParseGeneratorParameter(parameter, &option_pairs);
+
+    std::unique_ptr<google::protobuf::io::ZeroCopyOutputStream> output(
+        context->Open(basename + ".proto"));
+    string content = new_pool_.BuildFile(new_file)->DebugString();
+    Printer printer(output.get(), '$');
+    printer.WriteRaw(content.c_str(), content.size());
+
+    return true;
+  }
+};
+
+}  // namespace compiler
+}  // namespace protobuf
+}  // namespace google
+
+int main(int argc, char* argv[]) {
+  google::protobuf::compiler::GoGoProtoGenerator generator;
+  return google::protobuf::compiler::PluginMain(argc, argv, &generator);
+}
diff --git a/benchmarks/util/protoc-gen-proto2_to_proto3.cc b/benchmarks/util/protoc-gen-proto2_to_proto3.cc
new file mode 100644
index 00000000..d0a89023
--- /dev/null
+++ b/benchmarks/util/protoc-gen-proto2_to_proto3.cc
@@ -0,0 +1,115 @@
+#include "google/protobuf/compiler/code_generator.h"
+#include "google/protobuf/io/zero_copy_stream.h"
+#include "google/protobuf/io/printer.h"
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/descriptor.pb.h"
+#include "schema_proto2_to_proto3_util.h"
+
+#include "google/protobuf/compiler/plugin.h"
+
+using google::protobuf::FileDescriptorProto;
+using google::protobuf::FileDescriptor;
+using google::protobuf::DescriptorPool;
+using google::protobuf::io::Printer;
+using google::protobuf::util::SchemaGroupStripper;
+using google::protobuf::util::EnumScrubber;
+using google::protobuf::util::ExtensionStripper;
+using google::protobuf::util::FieldScrubber;
+
+namespace google {
+namespace protobuf {
+namespace compiler {
+
+namespace {
+
+string StripProto(string filename) {
+  return filename.substr(0, filename.rfind(".proto"));
+}
+
+DescriptorPool* GetPool() {
+  static DescriptorPool *pool = new DescriptorPool();
+  return pool;
+}
+
+}  // namespace
+
+class Proto2ToProto3Generator final : public CodeGenerator {
+ public:
+  bool GenerateAll(const std::vector<const FileDescriptor*>& files,
+                           const string& parameter,
+                           GeneratorContext* context,
+                           string* error) const {
+    for (int i = 0; i < files.size(); i++) {
+      for (auto file : files) {
+        if (CanGenerate(file)) {
+          Generate(file, parameter, context, error);
+          break;
+        }
+      }
+    }
+
+    return true;
+  }
+
+  bool Generate(const FileDescriptor* file,
+                        const string& parameter,
+                        GeneratorContext* context,
+                        string* error) const {
+    FileDescriptorProto new_file;
+    file->CopyTo(&new_file);
+    SchemaGroupStripper::StripFile(file, &new_file);
+
+    EnumScrubber enum_scrubber;
+    enum_scrubber.ScrubFile(&new_file);
+    ExtensionStripper::StripFile(&new_file);
+    FieldScrubber::ScrubFile(&new_file);
+    new_file.set_syntax("proto3");
+
+    string filename = file->name();
+    string basename = StripProto(filename);
+
+    std::vector<std::pair<string,string>> option_pairs;
+    ParseGeneratorParameter(parameter, &option_pairs);
+
+    std::unique_ptr<google::protobuf::io::ZeroCopyOutputStream> output(
+        context->Open(basename + ".proto"));
+    string content = GetPool()->BuildFile(new_file)->DebugString();
+    Printer printer(output.get(), '$');
+    printer.WriteRaw(content.c_str(), content.size());
+
+    return true;
+  }
+ private:
+  bool CanGenerate(const FileDescriptor* file) const {
+    if (GetPool()->FindFileByName(file->name()) != nullptr) {
+      return false;
+    }
+    for (int j = 0; j < file->dependency_count(); j++) {
+      if (GetPool()->FindFileByName(file->dependency(j)->name()) == nullptr) {
+        return false;
+      }
+    }
+    for (int j = 0; j < file->public_dependency_count(); j++) {
+      if (GetPool()->FindFileByName(
+          file->public_dependency(j)->name()) == nullptr) {
+        return false;
+      }
+    }
+    for (int j = 0; j < file->weak_dependency_count(); j++) {
+      if (GetPool()->FindFileByName(
+          file->weak_dependency(j)->name()) == nullptr) {
+        return false;
+      }
+    }
+    return true;
+  }
+};
+
+}  // namespace compiler
+}  // namespace protobuf
+}  // namespace google
+
+int main(int argc, char* argv[]) {
+  google::protobuf::compiler::Proto2ToProto3Generator generator;
+  return google::protobuf::compiler::PluginMain(argc, argv, &generator);
+}
diff --git a/benchmarks/util/run_and_upload.py b/benchmarks/util/run_and_upload.py
new file mode 100755
index 00000000..43c9fa2d
--- /dev/null
+++ b/benchmarks/util/run_and_upload.py
@@ -0,0 +1,292 @@
+from __future__ import print_function
+from __future__ import absolute_import
+import argparse
+import os
+import re
+import copy
+import uuid
+import calendar
+import time
+from . import big_query_utils
+import datetime
+import json
+# This import depends on the automake rule protoc_middleman, please make sure
+# protoc_middleman has been built before run this file.
+import os.path, sys
+sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import tmp.benchmarks_pb2 as benchmarks_pb2
+from click.types import STRING
+
+_PROJECT_ID = 'grpc-testing'
+_DATASET = 'protobuf_benchmark_result'
+_TABLE = 'opensource_result_v1'
+_NOW = "%d%02d%02d" % (datetime.datetime.now().year,
+                       datetime.datetime.now().month,
+                       datetime.datetime.now().day)
+
+file_size_map = {}
+
+def get_data_size(file_name):
+  if file_name in file_size_map:
+    return file_size_map[file_name]
+  benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
+  benchmark_dataset.ParseFromString(
+      open(os.path.dirname(os.path.abspath(__file__)) + "/../" + file_name).read())
+  size = 0
+  count = 0
+  for payload in benchmark_dataset.payload:
+    size += len(payload)
+    count += 1
+  file_size_map[file_name] = (size, 1.0 * size / count)
+  return size, 1.0 * size / count
+
+
+def extract_file_name(file_name):
+  name_list = re.split("[/\.]", file_name)
+  short_file_name = ""
+  for name in name_list:
+    if name[:14] == "google_message":
+      short_file_name = name
+  return short_file_name
+
+
+cpp_result = []
+python_result = []
+java_result = []
+go_result = []
+
+
+# CPP results example:
+# [ 
+#   "benchmarks": [ 
+#     {
+#       "bytes_per_second": int,
+#       "cpu_time": int,
+#       "name: string,
+#       "time_unit: string,
+#       ...
+#     },
+#     ... 
+#   ],
+#   ... 
+# ]
+def parse_cpp_result(filename):
+  global cpp_result
+  if filename == "":
+    return
+  if filename[0] != '/':
+    filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
+  with open(filename) as f:
+    results = json.loads(f.read())
+    for benchmark in results["benchmarks"]:
+      data_filename = "".join(
+          re.split("(_parse_|_serialize)", benchmark["name"])[0])
+      behavior = benchmark["name"][len(data_filename) + 1:]
+      cpp_result.append({
+        "language": "cpp",
+        "dataFileName": data_filename,
+        "behavior": behavior,
+        "throughput": benchmark["bytes_per_second"] / 2.0 ** 20
+      })
+
+
+# Python results example:
+# [ 
+#   [ 
+#     {
+#       "filename": string,
+#       "benchmarks": {
+#         behavior: results, 
+#         ...
+#       },
+#       "message_name": STRING
+#     },
+#     ... 
+#   ], #pure-python
+#   ... 
+# ]
+def parse_python_result(filename):
+  global python_result
+  if filename == "":
+    return
+  if filename[0] != '/':
+    filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
+  with open(filename) as f:
+    results_list = json.loads(f.read())
+    for results in results_list:
+      for result in results:
+        _, avg_size = get_data_size(result["filename"])
+        for behavior in result["benchmarks"]:
+          python_result.append({
+            "language": "python",
+            "dataFileName": extract_file_name(result["filename"]),
+            "behavior": behavior,
+            "throughput": avg_size /
+                          result["benchmarks"][behavior] * 1e9 / 2 ** 20
+          })
+
+
+# Java results example:
+# [ 
+#   {
+#     "id": string,
+#     "instrumentSpec": {...},
+#     "measurements": [
+#       {
+#         "weight": float,
+#         "value": {
+#           "magnitude": float,
+#           "unit": string
+#         },
+#         ...
+#       },
+#       ...
+#     ],
+#     "run": {...},
+#     "scenario": {
+#       "benchmarkSpec": {
+#         "methodName": string,
+#         "parameters": {
+#            defined parameters in the benchmark: parameters value
+#         },
+#         ...
+#       },
+#       ...
+#     }
+#     
+#   }, 
+#   ... 
+# ]
+def parse_java_result(filename):
+  global average_bytes_per_message, java_result
+  if filename == "":
+    return
+  if filename[0] != '/':
+    filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
+  with open(filename) as f:
+    results = json.loads(f.read())
+    for result in results:
+      total_weight = 0
+      total_value = 0
+      for measurement in result["measurements"]:
+        total_weight += measurement["weight"]
+        total_value += measurement["value"]["magnitude"]
+      avg_time = total_value * 1.0 / total_weight
+      total_size, _ = get_data_size(
+          result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"])
+      java_result.append({
+        "language": "java",
+        "throughput": total_size / avg_time * 1e9 / 2 ** 20,
+        "behavior": result["scenario"]["benchmarkSpec"]["methodName"],
+        "dataFileName": extract_file_name(
+            result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"])
+      })
+
+
+# Go benchmark results:
+#
+# goos: linux
+# goarch: amd64
+# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Unmarshal-12               3000      705784 ns/op
+# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Marshal-12                 2000      634648 ns/op
+# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Size-12                    5000      244174 ns/op
+# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Clone-12                    300     4120954 ns/op
+# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12                    300     4108632 ns/op
+# PASS
+# ok    _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks  124.173s
+def parse_go_result(filename):
+  global go_result
+  if filename == "":
+    return
+  if filename[0] != '/':
+    filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
+  with open(filename) as f:
+    for line in f:
+      result_list = re.split("[\ \t]+", line)
+      if result_list[0][:9] != "Benchmark":
+        continue
+      first_slash_index = result_list[0].find('/')
+      last_slash_index = result_list[0].rfind('/')
+      full_filename = result_list[0][first_slash_index+4:last_slash_index] # delete ../ prefix
+      total_bytes, _ = get_data_size(full_filename)
+      behavior_with_suffix = result_list[0][last_slash_index+1:]
+      last_dash = behavior_with_suffix.rfind("-")
+      if last_dash == -1:
+        behavior = behavior_with_suffix
+      else:
+        behavior = behavior_with_suffix[:last_dash]
+      go_result.append({
+        "dataFilename": extract_file_name(full_filename),
+        "throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20,
+        "behavior": behavior,
+        "language": "go"
+      })
+
+
+def get_metadata():
+  build_number = os.getenv('BUILD_NUMBER')
+  build_url = os.getenv('BUILD_URL')
+  job_name = os.getenv('JOB_NAME')
+  git_commit = os.getenv('GIT_COMMIT')
+  # actual commit is the actual head of PR that is getting tested
+  git_actual_commit = os.getenv('ghprbActualCommit')
+
+  utc_timestamp = str(calendar.timegm(time.gmtime()))
+  metadata = {'created': utc_timestamp}
+
+  if build_number:
+    metadata['buildNumber'] = build_number
+  if build_url:
+    metadata['buildUrl'] = build_url
+  if job_name:
+    metadata['jobName'] = job_name
+  if git_commit:
+    metadata['gitCommit'] = git_commit
+  if git_actual_commit:
+    metadata['gitActualCommit'] = git_actual_commit
+
+  return metadata
+
+
+def upload_result(result_list, metadata):
+  for result in result_list:
+    new_result = copy.deepcopy(result)
+    new_result['metadata'] = metadata
+    bq = big_query_utils.create_big_query()
+    row = big_query_utils.make_row(str(uuid.uuid4()), new_result)
+    if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET,
+                                       _TABLE + "$" + _NOW,
+                                       [row]):
+      print('Error when uploading result', new_result)
+
+
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser()
+  parser.add_argument("-cpp", "--cpp_input_file",
+                      help="The CPP benchmark result file's name",
+                      default="")
+  parser.add_argument("-java", "--java_input_file",
+                      help="The Java benchmark result file's name",
+                      default="")
+  parser.add_argument("-python", "--python_input_file",
+                      help="The Python benchmark result file's name",
+                      default="")
+  parser.add_argument("-go", "--go_input_file",
+                      help="The golang benchmark result file's name",
+                      default="")
+  args = parser.parse_args()
+
+  parse_cpp_result(args.cpp_input_file)
+  parse_python_result(args.python_input_file)
+  parse_java_result(args.java_input_file)
+  parse_go_result(args.go_input_file)
+
+  metadata = get_metadata()
+  print("uploading cpp results...")
+  upload_result(cpp_result, metadata)
+  print("uploading java results...")
+  upload_result(java_result, metadata)
+  print("uploading python results...")
+  upload_result(python_result, metadata)
+  print("uploading go results...")
+  upload_result(go_result, metadata)
diff --git a/benchmarks/util/schema_proto2_to_proto3_util.h b/benchmarks/util/schema_proto2_to_proto3_util.h
new file mode 100644
index 00000000..0079f6f1
--- /dev/null
+++ b/benchmarks/util/schema_proto2_to_proto3_util.h
@@ -0,0 +1,194 @@
+#ifndef PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_
+#define PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_
+
+#include "google/protobuf/message.h"
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/descriptor.pb.h"
+
+#include <sstream>
+#include <algorithm>
+
+using google::protobuf::Descriptor;
+using google::protobuf::DescriptorProto;
+using google::protobuf::FileDescriptorProto;
+using google::protobuf::FieldDescriptorProto;
+using google::protobuf::Message;
+using google::protobuf::EnumValueDescriptorProto;
+
+namespace google {
+namespace protobuf {
+namespace util {
+
+class SchemaGroupStripper {
+
+ public:
+  static void StripFile(const FileDescriptor* old_file,
+                        FileDescriptorProto *file) {
+    for (int i = file->mutable_message_type()->size() - 1; i >= 0; i--) {
+      if (IsMessageSet(old_file->message_type(i))) {
+        file->mutable_message_type()->DeleteSubrange(i, 1);
+        continue;
+      }
+      StripMessage(old_file->message_type(i), file->mutable_message_type(i));
+    }
+    for (int i = file->mutable_extension()->size() - 1; i >= 0; i--) {
+      auto field = old_file->extension(i);
+      if (field->type() == FieldDescriptor::TYPE_GROUP ||
+          IsMessageSet(field->message_type()) ||
+          IsMessageSet(field->containing_type())) {
+        file->mutable_extension()->DeleteSubrange(i, 1);
+      }
+    }
+  }
+
+ private:
+  static bool IsMessageSet(const Descriptor *descriptor) {
+    if (descriptor != nullptr
+        && descriptor->options().message_set_wire_format()) {
+      return true;
+    }
+    return false;
+  }
+
+  static void StripMessage(const Descriptor *old_message,
+                           DescriptorProto *new_message) {
+    for (int i = new_message->mutable_field()->size() - 1; i >= 0; i--) {
+      if (old_message->field(i)->type() == FieldDescriptor::TYPE_GROUP ||
+          IsMessageSet(old_message->field(i)->message_type())) {
+        new_message->mutable_field()->DeleteSubrange(i, 1);
+      }
+    }
+    for (int i = new_message->mutable_extension()->size() - 1; i >= 0; i--) {
+      auto field_type_name = new_message->mutable_extension(i)->type_name();
+      if (old_message->extension(i)->type() == FieldDescriptor::TYPE_GROUP ||
+          IsMessageSet(old_message->extension(i)->containing_type()) ||
+          IsMessageSet(old_message->extension(i)->message_type())) {
+        new_message->mutable_extension()->DeleteSubrange(i, 1);
+      }
+    }
+    for (int i = 0; i < new_message->mutable_nested_type()->size(); i++) {
+      StripMessage(old_message->nested_type(i),
+                   new_message->mutable_nested_type(i));
+    }
+  }
+
+};
+
+class EnumScrubber {
+
+ public:
+  EnumScrubber()
+      : total_added_(0) {
+  }
+
+  void ScrubFile(FileDescriptorProto *file) {
+    for (int i = 0; i < file->enum_type_size(); i++) {
+      ScrubEnum(file->mutable_enum_type(i));
+    }
+    for (int i = 0; i < file->mutable_message_type()->size(); i++) {
+      ScrubMessage(file->mutable_message_type(i));
+    }
+  }
+
+ private:
+  void ScrubEnum(EnumDescriptorProto *enum_type) {
+    if (enum_type->value(0).number() != 0) {
+      bool has_zero = false;
+      for (int j = 0; j < enum_type->value().size(); j++) {
+        if (enum_type->value(j).number() == 0) {
+          EnumValueDescriptorProto temp_enum_value;
+          temp_enum_value.CopyFrom(enum_type->value(j));
+          enum_type->mutable_value(j)->CopyFrom(enum_type->value(0));
+          enum_type->mutable_value(0)->CopyFrom(temp_enum_value);
+          has_zero = true;
+          break;
+        }
+      }
+      if (!has_zero) {
+        enum_type->mutable_value()->Add();
+        for (int i = enum_type->mutable_value()->size() - 1; i > 0; i--) {
+          enum_type->mutable_value(i)->CopyFrom(
+              *enum_type->mutable_value(i - 1));
+        }
+        enum_type->mutable_value(0)->set_number(0);
+        enum_type->mutable_value(0)->set_name("ADDED_ZERO_VALUE_" +
+                                              std::to_string(total_added_++));
+      }
+    }
+
+  }
+
+  void ScrubMessage(DescriptorProto *message_type) {
+    for (int i = 0; i < message_type->mutable_enum_type()->size(); i++) {
+      ScrubEnum(message_type->mutable_enum_type(i));
+    }
+    for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) {
+      ScrubMessage(message_type->mutable_nested_type(i));
+    }
+  }
+
+  int total_added_;
+};
+
+class ExtensionStripper {
+ public:
+  static void StripFile(FileDescriptorProto *file) {
+    for (int i = 0; i < file->mutable_message_type()->size(); i++) {
+      StripMessage(file->mutable_message_type(i));
+    }
+    file->mutable_extension()->Clear();
+  }
+ private:
+  static void StripMessage(DescriptorProto *message_type) {
+    message_type->mutable_extension()->Clear();
+    message_type->clear_extension_range();
+    for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) {
+      StripMessage(message_type->mutable_nested_type(i));
+    }
+  }
+};
+
+
+class FieldScrubber {
+ public:
+  static void ScrubFile(FileDescriptorProto *file) {
+    for (int i = 0; i < file->mutable_message_type()->size(); i++) {
+      ScrubMessage(file->mutable_message_type(i));
+    }
+    for (int i = 0; i < file->mutable_extension()->size(); i++) {
+      file->mutable_extension(i)->clear_default_value();
+      if (ShouldClearLabel(file->mutable_extension(i))) {
+        file->mutable_extension(i)->clear_label();
+      }
+    }
+  }
+ private:
+  static bool ShouldClearLabel(const FieldDescriptorProto *field) {
+    return field->label() == FieldDescriptorProto::LABEL_REQUIRED;
+  }
+
+  static void ScrubMessage(DescriptorProto *message_type) {
+    message_type->mutable_extension()->Clear();
+    for (int i = 0; i < message_type->mutable_extension()->size(); i++) {
+      message_type->mutable_extension(i)->clear_default_value();
+      if (ShouldClearLabel(message_type->mutable_extension(i))) {
+        message_type->mutable_extension(i)->clear_label();
+      }
+    }
+    for (int i = 0; i < message_type->mutable_field()->size(); i++) {
+      message_type->mutable_field(i)->clear_default_value();
+      if (ShouldClearLabel(message_type->mutable_field(i))) {
+        message_type->mutable_field(i)->clear_label();
+      }
+    }
+    for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) {
+      ScrubMessage(message_type->mutable_nested_type(i));
+    }
+  }
+};
+
+}  // namespace util
+}  // namespace protobuf
+}  // namespace google
+
+#endif  // PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_