Modified bench_norm_benchmark to use the new Benchmark API.

To run it now: bazel run //tensorflow/..:batch_norm_benchmark -- --benchmarks=.. --use_gpu={false/true} Also a tiny file naming bugfix to run_and_gather_logs_lib. Change: 117941756
author: Eugene Brevdo <ebrevdo@gmail.com> 2016-03-23 09:12:10 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-03-23 12:10:51 -0700
commit: 0417e6716fb230ab7182325fd58c8b2c090c593d (patch)
tree: 1b8d09c5cb4bf50f2bfca06d3fba7dbbdafd1473
parent: 7d471c23a4ea5ac163b7ebc5a137f9650d232348 (diff)
3 files changed, 116 insertions, 101 deletions
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index e0bf330fec..2ec8f3c9c3 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1290,14 +1290,9 @@ filegroup(
     visibility = ["//tensorflow:__subpackages__"],
 )
 
-py_binary(
+cuda_py_test(
     name = "batch_norm_benchmark",
     srcs = [
         "ops/batch_norm_benchmark.py",
     ],
-    main = "ops/batch_norm_benchmark.py",
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow:tensorflow_py",
-    ],
 )
diff --git a/tensorflow/python/ops/batch_norm_benchmark.py b/tensorflow/python/ops/batch_norm_benchmark.py
index 2f6ea1c4b5..534de6ab0f 100644
--- a/tensorflow/python/ops/batch_norm_benchmark.py
+++ b/tensorflow/python/ops/batch_norm_benchmark.py
@@ -24,7 +24,7 @@ import tensorflow as tf
 from tensorflow.python.ops import gen_nn_ops
 FLAGS = tf.app.flags.FLAGS
 
-tf.app.flags.DEFINE_boolean("use_gpu", False, """Run GPU benchmarks.""")
+tf.app.flags.DEFINE_boolean("use_gpu", True, """Run GPU benchmarks.""")
 
 
 def batch_norm_op(tensor, mean, variance, beta, gamma, scale):
@@ -100,116 +100,135 @@ def build_graph(device, input_shape, axes, num_layers, mode, scale, train):
       return [tensor]
 
 
-def run_graph(device, input_shape, axes, num_layers, mode, scale, train,
-              num_iters):
-  """Run the graph and print its execution time.
-
-  Args:
-    device: string, the device to run on.
-    input_shape: shape of the input tensor.
-    axes: axes that are to be normalized across.
-    num_layers: number of batch normalization layers in the graph.
-    mode: "op", "py" or "slow" depending on the implementation.
-    scale: scale after normalization.
-    train: if true, also run backprop.
-    num_iters: number of steps to run.
-
-  Returns:
-    The duration of the run in seconds.
-  """
-  graph = tf.Graph()
-  with graph.as_default():
-    outputs = build_graph(device, input_shape, axes, num_layers, mode, scale,
-                          train)
-  with tf.Session(graph=graph) as session:
-    tf.initialize_all_variables().run()
-    _ = session.run([out.op for out in outputs])  # warm up.
-    start_time = time.time()
-    for _ in range(num_iters):
-      _ = session.run([out.op for out in outputs])
-  duration = time.time() - start_time
-  print("%s shape:%d/%d #layers:%d mode:%s scale:%r train:%r - %f secs" %
-        (device, len(input_shape), len(axes), num_layers, mode, scale, train,
-         duration / num_iters))
-  return duration
 def print_difference(mode, t1, t2):
   """Print the difference in timing between two runs."""
   difference = (t2 - t1) / t1 * 100.0
   print("=== %s: %.1f%% ===" % (mode, difference))
 
 
-def main(unused_argv):
-  print("Forward convolution (lower layers).")
-  shape = [8, 128, 128, 32]
-  axes = [0, 1, 2]
-  t1 = run_graph("cpu", shape, axes, 10, "op", True, False, 5)
-  t2 = run_graph("cpu", shape, axes, 10, "py", True, False, 5)
-  t3 = run_graph("cpu", shape, axes, 10, "slow", True, False, 5)
-  print_difference("op vs py", t1, t2)
-  print_difference("py vs slow", t2, t3)
-  if FLAGS.use_gpu:
-    t1 = run_graph("gpu", shape, axes, 10, "op", True, False, 50)
-    t2 = run_graph("gpu", shape, axes, 10, "py", True, False, 50)
-    t3 = run_graph("gpu", shape, axes, 10, "slow", True, False, 50)
+class BatchNormBenchmark(tf.test.Benchmark):
+  """Benchmark batch normalization."""
+
+  def _run_graph(
+      self, device, input_shape, axes, num_layers, mode, scale, train,
+      num_iters):
+    """Run the graph and print its execution time.
+
+    Args:
+      device: string, the device to run on.
+      input_shape: shape of the input tensor.
+      axes: axes that are to be normalized across.
+      num_layers: number of batch normalization layers in the graph.
+      mode: "op", "py" or "slow" depending on the implementation.
+      scale: scale after normalization.
+      train: if true, also run backprop.
+      num_iters: number of steps to run.
+
+    Returns:
+      The duration of the run in seconds.
+    """
+    graph = tf.Graph()
+    with graph.as_default():
+      outputs = build_graph(device, input_shape, axes, num_layers, mode, scale,
+                            train)
+    with tf.Session(graph=graph) as session:
+      tf.initialize_all_variables().run()
+      _ = session.run([out.op for out in outputs])  # warm up.
+      start_time = time.time()
+      for _ in range(num_iters):
+        _ = session.run([out.op for out in outputs])
+      duration = time.time() - start_time
+    print("%s shape:%d/%d #layers:%d mode:%s scale:%r train:%r - %f secs" %
+          (device, len(input_shape), len(axes), num_layers, mode, scale, train,
+           duration / num_iters))
+
+    name_template = (
+        "batch_norm_{device}_input_shape_{shape}_axes_{axes}_mode_{mode}_"
+        "layers_{num_layers}_scale_{scale}_"
+        "train_{train}")
+
+    self.report_benchmark(
+        name=name_template.format(
+            device=device, mode=mode, num_layers=num_layers, scale=scale,
+            train=train,
+            shape=str(input_shape).replace(" ", ""),
+            axes=str(axes)).replace(" ", ""),
+        iters=num_iters, wall_time=duration / num_iters)
+
+    return duration
+
+  def benchmark_batch_norm(self):
+    print("Forward convolution (lower layers).")
+    shape = [8, 128, 128, 32]
+    axes = [0, 1, 2]
+    t1 = self._run_graph("cpu", shape, axes, 10, "op", True, False, 5)
+    t2 = self._run_graph("cpu", shape, axes, 10, "py", True, False, 5)
+    t3 = self._run_graph("cpu", shape, axes, 10, "slow", True, False, 5)
     print_difference("op vs py", t1, t2)
     print_difference("py vs slow", t2, t3)
-  print("Forward/backward convolution (lower layers).")
-  t1 = run_graph("cpu", shape, axes, 10, "op", True, True, 5)
-  t2 = run_graph("cpu", shape, axes, 10, "py", True, True, 5)
-  t3 = run_graph("cpu", shape, axes, 10, "slow", True, True, 5)
-  print_difference("op vs py", t1, t2)
-  print_difference("py vs slow", t2, t3)
-  if FLAGS.use_gpu:
-    t1 = run_graph("gpu", shape, axes, 10, "op", True, True, 50)
-    t2 = run_graph("gpu", shape, axes, 10, "py", True, True, 50)
-    t2 = run_graph("gpu", shape, axes, 10, "slow", True, True, 50)
+    if FLAGS.use_gpu:
+      t1 = self._run_graph("gpu", shape, axes, 10, "op", True, False, 50)
+      t2 = self._run_graph("gpu", shape, axes, 10, "py", True, False, 50)
+      t3 = self._run_graph("gpu", shape, axes, 10, "slow", True, False, 50)
+      print_difference("op vs py", t1, t2)
+      print_difference("py vs slow", t2, t3)
+    print("Forward/backward convolution (lower layers).")
+    t1 = self._run_graph("cpu", shape, axes, 10, "op", True, True, 5)
+    t2 = self._run_graph("cpu", shape, axes, 10, "py", True, True, 5)
+    t3 = self._run_graph("cpu", shape, axes, 10, "slow", True, True, 5)
     print_difference("op vs py", t1, t2)
     print_difference("py vs slow", t2, t3)
-  print("Forward convolution (higher layers).")
-  shape = [256, 17, 17, 32]
-  axes = [0, 1, 2]
-  t1 = run_graph("cpu", shape, axes, 10, "op", True, False, 5)
-  t2 = run_graph("cpu", shape, axes, 10, "py", True, False, 5)
-  t3 = run_graph("cpu", shape, axes, 10, "slow", True, False, 5)
-  print_difference("op vs py", t1, t2)
-  print_difference("py vs slow", t2, t3)
-  if FLAGS.use_gpu:
-    t1 = run_graph("gpu", shape, axes, 10, "op", True, False, 50)
-    t2 = run_graph("gpu", shape, axes, 10, "py", True, False, 50)
-    t3 = run_graph("gpu", shape, axes, 10, "slow", True, False, 50)
+    if FLAGS.use_gpu:
+      t1 = self._run_graph("gpu", shape, axes, 10, "op", True, True, 50)
+      t2 = self._run_graph("gpu", shape, axes, 10, "py", True, True, 50)
+      t2 = self._run_graph("gpu", shape, axes, 10, "slow", True, True, 50)
+      print_difference("op vs py", t1, t2)
+      print_difference("py vs slow", t2, t3)
+    print("Forward convolution (higher layers).")
+    shape = [256, 17, 17, 32]
+    axes = [0, 1, 2]
+    t1 = self._run_graph("cpu", shape, axes, 10, "op", True, False, 5)
+    t2 = self._run_graph("cpu", shape, axes, 10, "py", True, False, 5)
+    t3 = self._run_graph("cpu", shape, axes, 10, "slow", True, False, 5)
     print_difference("op vs py", t1, t2)
     print_difference("py vs slow", t2, t3)
-  print("Forward/backward convolution (higher layers).")
-  t1 = run_graph("cpu", shape, axes, 10, "op", True, True, 5)
-  t2 = run_graph("cpu", shape, axes, 10, "py", True, True, 5)
-  t3 = run_graph("cpu", shape, axes, 10, "slow", True, True, 5)
-  print_difference("op vs py", t1, t2)
-  print_difference("py vs slow", t2, t3)
-  if FLAGS.use_gpu:
-    t1 = run_graph("gpu", shape, axes, 10, "op", True, True, 50)
-    t2 = run_graph("gpu", shape, axes, 10, "py", True, True, 50)
-    t3 = run_graph("gpu", shape, axes, 10, "slow", True, True, 50)
+    if FLAGS.use_gpu:
+      t1 = self._run_graph("gpu", shape, axes, 10, "op", True, False, 50)
+      t2 = self._run_graph("gpu", shape, axes, 10, "py", True, False, 50)
+      t3 = self._run_graph("gpu", shape, axes, 10, "slow", True, False, 50)
+      print_difference("op vs py", t1, t2)
+      print_difference("py vs slow", t2, t3)
+    print("Forward/backward convolution (higher layers).")
+    t1 = self._run_graph("cpu", shape, axes, 10, "op", True, True, 5)
+    t2 = self._run_graph("cpu", shape, axes, 10, "py", True, True, 5)
+    t3 = self._run_graph("cpu", shape, axes, 10, "slow", True, True, 5)
     print_difference("op vs py", t1, t2)
     print_difference("py vs slow", t2, t3)
-  print("Forward fully-connected.")
-  shape = [1024, 32]
-  axes = [0]
-  t1 = run_graph("cpu", shape, axes, 10, "py", True, False, 5)
-  t2 = run_graph("cpu", shape, axes, 10, "slow", True, False, 5)
-  print_difference("py vs slow", t1, t2)
-  if FLAGS.use_gpu:
-    t1 = run_graph("gpu", shape, axes, 10, "py", True, False, 50)
-    t2 = run_graph("gpu", shape, axes, 10, "slow", True, False, 50)
+    if FLAGS.use_gpu:
+      t1 = self._run_graph("gpu", shape, axes, 10, "op", True, True, 50)
+      t2 = self._run_graph("gpu", shape, axes, 10, "py", True, True, 50)
+      t3 = self._run_graph("gpu", shape, axes, 10, "slow", True, True, 50)
+      print_difference("op vs py", t1, t2)
+      print_difference("py vs slow", t2, t3)
+    print("Forward fully-connected.")
+    shape = [1024, 32]
+    axes = [0]
+    t1 = self._run_graph("cpu", shape, axes, 10, "py", True, False, 5)
+    t2 = self._run_graph("cpu", shape, axes, 10, "slow", True, False, 5)
     print_difference("py vs slow", t1, t2)
-  print("Forward/backward fully-connected.")
-  t1 = run_graph("cpu", shape, axes, 10, "py", True, True, 50)
-  t2 = run_graph("cpu", shape, axes, 10, "slow", True, True, 50)
-  print_difference("py vs slow", t1, t2)
-  if FLAGS.use_gpu:
-    t1 = run_graph("gpu", shape, axes, 10, "py", True, True, 5)
-    t2 = run_graph("gpu", shape, axes, 10, "slow", True, True, 5)
+    if FLAGS.use_gpu:
+      t1 = self._run_graph("gpu", shape, axes, 10, "py", True, False, 50)
+      t2 = self._run_graph("gpu", shape, axes, 10, "slow", True, False, 50)
+      print_difference("py vs slow", t1, t2)
+    print("Forward/backward fully-connected.")
+    t1 = self._run_graph("cpu", shape, axes, 10, "py", True, True, 50)
+    t2 = self._run_graph("cpu", shape, axes, 10, "slow", True, True, 50)
     print_difference("py vs slow", t1, t2)
+    if FLAGS.use_gpu:
+      t1 = self._run_graph("gpu", shape, axes, 10, "py", True, True, 5)
+      t2 = self._run_graph("gpu", shape, axes, 10, "slow", True, True, 5)
+      print_difference("py vs slow", t1, t2)
 
 
 if __name__ == "__main__":
-  tf.app.run()
+  tf.test.main()
diff --git a/tensorflow/tools/test/run_and_gather_logs_lib.py b/tensorflow/tools/test/run_and_gather_logs_lib.py
index 478a580483..afe8f210cc 100644
--- a/tensorflow/tools/test/run_and_gather_logs_lib.py
+++ b/tensorflow/tools/test/run_and_gather_logs_lib.py
@@ -92,6 +92,7 @@ def run_and_gather_logs(test_name, test_args):
   temp_directory = tempfile.mkdtemp(prefix="run_and_gather_logs")
   mangled_test_name = test_name.strip("/").replace("/", "_").replace(":", "_")
   test_file_prefix = os.path.join(temp_directory, mangled_test_name)
+  test_file_prefix = "%s." % test_file_prefix
 
   try:
     if not tf.gfile.Exists(test_executable):
author	Eugene Brevdo <ebrevdo@gmail.com>	2016-03-23 09:12:10 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-03-23 12:10:51 -0700
commit	0417e6716fb230ab7182325fd58c8b2c090c593d (patch)
tree	1b8d09c5cb4bf50f2bfca06d3fba7dbbdafd1473
parent	7d471c23a4ea5ac163b7ebc5a137f9650d232348 (diff)