aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Eugene Brevdo <ebrevdo@gmail.com>2016-03-23 09:12:10 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-03-23 12:10:51 -0700
commit0417e6716fb230ab7182325fd58c8b2c090c593d (patch)
tree1b8d09c5cb4bf50f2bfca06d3fba7dbbdafd1473
parent7d471c23a4ea5ac163b7ebc5a137f9650d232348 (diff)
Modified bench_norm_benchmark to use the new Benchmark API.
To run it now: bazel run //tensorflow/..:batch_norm_benchmark -- --benchmarks=.. --use_gpu={false/true} Also a tiny file naming bugfix to run_and_gather_logs_lib. Change: 117941756
-rw-r--r--tensorflow/python/BUILD7
-rw-r--r--tensorflow/python/ops/batch_norm_benchmark.py209
-rw-r--r--tensorflow/tools/test/run_and_gather_logs_lib.py1
3 files changed, 116 insertions, 101 deletions
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index e0bf330fec..2ec8f3c9c3 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1290,14 +1290,9 @@ filegroup(
visibility = ["//tensorflow:__subpackages__"],
)
-py_binary(
+cuda_py_test(
name = "batch_norm_benchmark",
srcs = [
"ops/batch_norm_benchmark.py",
],
- main = "ops/batch_norm_benchmark.py",
- srcs_version = "PY2AND3",
- deps = [
- "//tensorflow:tensorflow_py",
- ],
)
diff --git a/tensorflow/python/ops/batch_norm_benchmark.py b/tensorflow/python/ops/batch_norm_benchmark.py
index 2f6ea1c4b5..534de6ab0f 100644
--- a/tensorflow/python/ops/batch_norm_benchmark.py
+++ b/tensorflow/python/ops/batch_norm_benchmark.py
@@ -24,7 +24,7 @@ import tensorflow as tf
from tensorflow.python.ops import gen_nn_ops
FLAGS = tf.app.flags.FLAGS
-tf.app.flags.DEFINE_boolean("use_gpu", False, """Run GPU benchmarks.""")
+tf.app.flags.DEFINE_boolean("use_gpu", True, """Run GPU benchmarks.""")
def batch_norm_op(tensor, mean, variance, beta, gamma, scale):
@@ -100,116 +100,135 @@ def build_graph(device, input_shape, axes, num_layers, mode, scale, train):
return [tensor]
-def run_graph(device, input_shape, axes, num_layers, mode, scale, train,
- num_iters):
- """Run the graph and print its execution time.
-
- Args:
- device: string, the device to run on.
- input_shape: shape of the input tensor.
- axes: axes that are to be normalized across.
- num_layers: number of batch normalization layers in the graph.
- mode: "op", "py" or "slow" depending on the implementation.
- scale: scale after normalization.
- train: if true, also run backprop.
- num_iters: number of steps to run.
-
- Returns:
- The duration of the run in seconds.
- """
- graph = tf.Graph()
- with graph.as_default():
- outputs = build_graph(device, input_shape, axes, num_layers, mode, scale,
- train)
- with tf.Session(graph=graph) as session:
- tf.initialize_all_variables().run()
- _ = session.run([out.op for out in outputs]) # warm up.
- start_time = time.time()
- for _ in range(num_iters):
- _ = session.run([out.op for out in outputs])
- duration = time.time() - start_time
- print("%s shape:%d/%d #layers:%d mode:%s scale:%r train:%r - %f secs" %
- (device, len(input_shape), len(axes), num_layers, mode, scale, train,
- duration / num_iters))
- return duration
def print_difference(mode, t1, t2):
"""Print the difference in timing between two runs."""
difference = (t2 - t1) / t1 * 100.0
print("=== %s: %.1f%% ===" % (mode, difference))
-def main(unused_argv):
- print("Forward convolution (lower layers).")
- shape = [8, 128, 128, 32]
- axes = [0, 1, 2]
- t1 = run_graph("cpu", shape, axes, 10, "op", True, False, 5)
- t2 = run_graph("cpu", shape, axes, 10, "py", True, False, 5)
- t3 = run_graph("cpu", shape, axes, 10, "slow", True, False, 5)
- print_difference("op vs py", t1, t2)
- print_difference("py vs slow", t2, t3)
- if FLAGS.use_gpu:
- t1 = run_graph("gpu", shape, axes, 10, "op", True, False, 50)
- t2 = run_graph("gpu", shape, axes, 10, "py", True, False, 50)
- t3 = run_graph("gpu", shape, axes, 10, "slow", True, False, 50)
+class BatchNormBenchmark(tf.test.Benchmark):
+ """Benchmark batch normalization."""
+
+ def _run_graph(
+ self, device, input_shape, axes, num_layers, mode, scale, train,
+ num_iters):
+ """Run the graph and print its execution time.
+
+ Args:
+ device: string, the device to run on.
+ input_shape: shape of the input tensor.
+ axes: axes that are to be normalized across.
+ num_layers: number of batch normalization layers in the graph.
+ mode: "op", "py" or "slow" depending on the implementation.
+ scale: scale after normalization.
+ train: if true, also run backprop.
+ num_iters: number of steps to run.
+
+ Returns:
+ The duration of the run in seconds.
+ """
+ graph = tf.Graph()
+ with graph.as_default():
+ outputs = build_graph(device, input_shape, axes, num_layers, mode, scale,
+ train)
+ with tf.Session(graph=graph) as session:
+ tf.initialize_all_variables().run()
+ _ = session.run([out.op for out in outputs]) # warm up.
+ start_time = time.time()
+ for _ in range(num_iters):
+ _ = session.run([out.op for out in outputs])
+ duration = time.time() - start_time
+ print("%s shape:%d/%d #layers:%d mode:%s scale:%r train:%r - %f secs" %
+ (device, len(input_shape), len(axes), num_layers, mode, scale, train,
+ duration / num_iters))
+
+ name_template = (
+ "batch_norm_{device}_input_shape_{shape}_axes_{axes}_mode_{mode}_"
+ "layers_{num_layers}_scale_{scale}_"
+ "train_{train}")
+
+ self.report_benchmark(
+ name=name_template.format(
+ device=device, mode=mode, num_layers=num_layers, scale=scale,
+ train=train,
+ shape=str(input_shape).replace(" ", ""),
+ axes=str(axes)).replace(" ", ""),
+ iters=num_iters, wall_time=duration / num_iters)
+
+ return duration
+
+ def benchmark_batch_norm(self):
+ print("Forward convolution (lower layers).")
+ shape = [8, 128, 128, 32]
+ axes = [0, 1, 2]
+ t1 = self._run_graph("cpu", shape, axes, 10, "op", True, False, 5)
+ t2 = self._run_graph("cpu", shape, axes, 10, "py", True, False, 5)
+ t3 = self._run_graph("cpu", shape, axes, 10, "slow", True, False, 5)
print_difference("op vs py", t1, t2)
print_difference("py vs slow", t2, t3)
- print("Forward/backward convolution (lower layers).")
- t1 = run_graph("cpu", shape, axes, 10, "op", True, True, 5)
- t2 = run_graph("cpu", shape, axes, 10, "py", True, True, 5)
- t3 = run_graph("cpu", shape, axes, 10, "slow", True, True, 5)
- print_difference("op vs py", t1, t2)
- print_difference("py vs slow", t2, t3)
- if FLAGS.use_gpu:
- t1 = run_graph("gpu", shape, axes, 10, "op", True, True, 50)
- t2 = run_graph("gpu", shape, axes, 10, "py", True, True, 50)
- t2 = run_graph("gpu", shape, axes, 10, "slow", True, True, 50)
+ if FLAGS.use_gpu:
+ t1 = self._run_graph("gpu", shape, axes, 10, "op", True, False, 50)
+ t2 = self._run_graph("gpu", shape, axes, 10, "py", True, False, 50)
+ t3 = self._run_graph("gpu", shape, axes, 10, "slow", True, False, 50)
+ print_difference("op vs py", t1, t2)
+ print_difference("py vs slow", t2, t3)
+ print("Forward/backward convolution (lower layers).")
+ t1 = self._run_graph("cpu", shape, axes, 10, "op", True, True, 5)
+ t2 = self._run_graph("cpu", shape, axes, 10, "py", True, True, 5)
+ t3 = self._run_graph("cpu", shape, axes, 10, "slow", True, True, 5)
print_difference("op vs py", t1, t2)
print_difference("py vs slow", t2, t3)
- print("Forward convolution (higher layers).")
- shape = [256, 17, 17, 32]
- axes = [0, 1, 2]
- t1 = run_graph("cpu", shape, axes, 10, "op", True, False, 5)
- t2 = run_graph("cpu", shape, axes, 10, "py", True, False, 5)
- t3 = run_graph("cpu", shape, axes, 10, "slow", True, False, 5)
- print_difference("op vs py", t1, t2)
- print_difference("py vs slow", t2, t3)
- if FLAGS.use_gpu:
- t1 = run_graph("gpu", shape, axes, 10, "op", True, False, 50)
- t2 = run_graph("gpu", shape, axes, 10, "py", True, False, 50)
- t3 = run_graph("gpu", shape, axes, 10, "slow", True, False, 50)
+ if FLAGS.use_gpu:
+ t1 = self._run_graph("gpu", shape, axes, 10, "op", True, True, 50)
+ t2 = self._run_graph("gpu", shape, axes, 10, "py", True, True, 50)
+ t2 = self._run_graph("gpu", shape, axes, 10, "slow", True, True, 50)
+ print_difference("op vs py", t1, t2)
+ print_difference("py vs slow", t2, t3)
+ print("Forward convolution (higher layers).")
+ shape = [256, 17, 17, 32]
+ axes = [0, 1, 2]
+ t1 = self._run_graph("cpu", shape, axes, 10, "op", True, False, 5)
+ t2 = self._run_graph("cpu", shape, axes, 10, "py", True, False, 5)
+ t3 = self._run_graph("cpu", shape, axes, 10, "slow", True, False, 5)
print_difference("op vs py", t1, t2)
print_difference("py vs slow", t2, t3)
- print("Forward/backward convolution (higher layers).")
- t1 = run_graph("cpu", shape, axes, 10, "op", True, True, 5)
- t2 = run_graph("cpu", shape, axes, 10, "py", True, True, 5)
- t3 = run_graph("cpu", shape, axes, 10, "slow", True, True, 5)
- print_difference("op vs py", t1, t2)
- print_difference("py vs slow", t2, t3)
- if FLAGS.use_gpu:
- t1 = run_graph("gpu", shape, axes, 10, "op", True, True, 50)
- t2 = run_graph("gpu", shape, axes, 10, "py", True, True, 50)
- t3 = run_graph("gpu", shape, axes, 10, "slow", True, True, 50)
+ if FLAGS.use_gpu:
+ t1 = self._run_graph("gpu", shape, axes, 10, "op", True, False, 50)
+ t2 = self._run_graph("gpu", shape, axes, 10, "py", True, False, 50)
+ t3 = self._run_graph("gpu", shape, axes, 10, "slow", True, False, 50)
+ print_difference("op vs py", t1, t2)
+ print_difference("py vs slow", t2, t3)
+ print("Forward/backward convolution (higher layers).")
+ t1 = self._run_graph("cpu", shape, axes, 10, "op", True, True, 5)
+ t2 = self._run_graph("cpu", shape, axes, 10, "py", True, True, 5)
+ t3 = self._run_graph("cpu", shape, axes, 10, "slow", True, True, 5)
print_difference("op vs py", t1, t2)
print_difference("py vs slow", t2, t3)
- print("Forward fully-connected.")
- shape = [1024, 32]
- axes = [0]
- t1 = run_graph("cpu", shape, axes, 10, "py", True, False, 5)
- t2 = run_graph("cpu", shape, axes, 10, "slow", True, False, 5)
- print_difference("py vs slow", t1, t2)
- if FLAGS.use_gpu:
- t1 = run_graph("gpu", shape, axes, 10, "py", True, False, 50)
- t2 = run_graph("gpu", shape, axes, 10, "slow", True, False, 50)
+ if FLAGS.use_gpu:
+ t1 = self._run_graph("gpu", shape, axes, 10, "op", True, True, 50)
+ t2 = self._run_graph("gpu", shape, axes, 10, "py", True, True, 50)
+ t3 = self._run_graph("gpu", shape, axes, 10, "slow", True, True, 50)
+ print_difference("op vs py", t1, t2)
+ print_difference("py vs slow", t2, t3)
+ print("Forward fully-connected.")
+ shape = [1024, 32]
+ axes = [0]
+ t1 = self._run_graph("cpu", shape, axes, 10, "py", True, False, 5)
+ t2 = self._run_graph("cpu", shape, axes, 10, "slow", True, False, 5)
print_difference("py vs slow", t1, t2)
- print("Forward/backward fully-connected.")
- t1 = run_graph("cpu", shape, axes, 10, "py", True, True, 50)
- t2 = run_graph("cpu", shape, axes, 10, "slow", True, True, 50)
- print_difference("py vs slow", t1, t2)
- if FLAGS.use_gpu:
- t1 = run_graph("gpu", shape, axes, 10, "py", True, True, 5)
- t2 = run_graph("gpu", shape, axes, 10, "slow", True, True, 5)
+ if FLAGS.use_gpu:
+ t1 = self._run_graph("gpu", shape, axes, 10, "py", True, False, 50)
+ t2 = self._run_graph("gpu", shape, axes, 10, "slow", True, False, 50)
+ print_difference("py vs slow", t1, t2)
+ print("Forward/backward fully-connected.")
+ t1 = self._run_graph("cpu", shape, axes, 10, "py", True, True, 50)
+ t2 = self._run_graph("cpu", shape, axes, 10, "slow", True, True, 50)
print_difference("py vs slow", t1, t2)
+ if FLAGS.use_gpu:
+ t1 = self._run_graph("gpu", shape, axes, 10, "py", True, True, 5)
+ t2 = self._run_graph("gpu", shape, axes, 10, "slow", True, True, 5)
+ print_difference("py vs slow", t1, t2)
if __name__ == "__main__":
- tf.app.run()
+ tf.test.main()
diff --git a/tensorflow/tools/test/run_and_gather_logs_lib.py b/tensorflow/tools/test/run_and_gather_logs_lib.py
index 478a580483..afe8f210cc 100644
--- a/tensorflow/tools/test/run_and_gather_logs_lib.py
+++ b/tensorflow/tools/test/run_and_gather_logs_lib.py
@@ -92,6 +92,7 @@ def run_and_gather_logs(test_name, test_args):
temp_directory = tempfile.mkdtemp(prefix="run_and_gather_logs")
mangled_test_name = test_name.strip("/").replace("/", "_").replace(":", "_")
test_file_prefix = os.path.join(temp_directory, mangled_test_name)
+ test_file_prefix = "%s." % test_file_prefix
try:
if not tf.gfile.Exists(test_executable):