Sanitize before bigquery upload

author: Craig Tiller <ctiller@google.com> 2017-03-10 17:24:45 -0800
committer: Craig Tiller <ctiller@google.com> 2017-03-10 17:24:45 -0800
commit: bbfb25bf708688748fd9bf37c03ea7cfc5bcc410 (patch)
tree: b6172e96d586a39c4e45109393316920f0f8fb69 /tools/profiling/microbenchmarks
parent: 56cea8f2eea9c8cc607bdc2915046d7ba187cd16 (diff)
1 files changed, 15 insertions, 4 deletions
diff --git a/tools/profiling/microbenchmarks/bm2bq.py b/tools/profiling/microbenchmarks/bm2bq.py
index ae59332f1b..ffb11f57d8 100755
--- a/tools/profiling/microbenchmarks/bm2bq.py
+++ b/tools/profiling/microbenchmarks/bm2bq.py
@@ -73,6 +73,14 @@ columns = [
   ('framing_bytes_per_iteration', 'float'),
 ]
 
+SANITIZE = {
+  'integer': int,
+  'float': float,
+  'boolean': bool,
+  'string': str,
+  'timestamp': str,
+}
+
 if sys.argv[1] == '--schema':
   print ',\n'.join('%s:%s' % (k, t.upper()) for k, t in columns)
   sys.exit(0)
@@ -89,7 +97,10 @@ else:
 writer = csv.DictWriter(sys.stdout, [c for c,t in columns])
 
 for row in bm_json.expand_json(js, js2):
-  if 'label' in row:
-    del row['label']
-  del row['cpp_name']
-  writer.writerow(row)
+  sane_row = {}
+  for name, sql_type in columns:
+    if name in row:
+      if row[name] == '': continue
+      sane_row[name] = SANITIZE[sql_type](row[name])
+  writer.writerow(sane_row)
+
author	Craig Tiller <ctiller@google.com>	2017-03-10 17:24:45 -0800
committer	Craig Tiller <ctiller@google.com>	2017-03-10 17:24:45 -0800
commit	bbfb25bf708688748fd9bf37c03ea7cfc5bcc410 (patch)
tree	b6172e96d586a39c4e45109393316920f0f8fb69 /tools/profiling/microbenchmarks
parent	56cea8f2eea9c8cc607bdc2915046d7ba187cd16 (diff)