diff options
author | 2016-03-23 09:12:10 -0800 | |
---|---|---|
committer | 2016-03-23 12:10:51 -0700 | |
commit | 0417e6716fb230ab7182325fd58c8b2c090c593d (patch) | |
tree | 1b8d09c5cb4bf50f2bfca06d3fba7dbbdafd1473 | |
parent | 7d471c23a4ea5ac163b7ebc5a137f9650d232348 (diff) |
Modified bench_norm_benchmark to use the new Benchmark API.
To run it now:
bazel run //tensorflow/..:batch_norm_benchmark -- --benchmarks=.. --use_gpu={false/true}
Also a tiny file naming bugfix to run_and_gather_logs_lib.
Change: 117941756
-rw-r--r-- | tensorflow/python/BUILD | 7 | ||||
-rw-r--r-- | tensorflow/python/ops/batch_norm_benchmark.py | 209 | ||||
-rw-r--r-- | tensorflow/tools/test/run_and_gather_logs_lib.py | 1 |
3 files changed, 116 insertions, 101 deletions
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index e0bf330fec..2ec8f3c9c3 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1290,14 +1290,9 @@ filegroup( visibility = ["//tensorflow:__subpackages__"], ) -py_binary( +cuda_py_test( name = "batch_norm_benchmark", srcs = [ "ops/batch_norm_benchmark.py", ], - main = "ops/batch_norm_benchmark.py", - srcs_version = "PY2AND3", - deps = [ - "//tensorflow:tensorflow_py", - ], ) diff --git a/tensorflow/python/ops/batch_norm_benchmark.py b/tensorflow/python/ops/batch_norm_benchmark.py index 2f6ea1c4b5..534de6ab0f 100644 --- a/tensorflow/python/ops/batch_norm_benchmark.py +++ b/tensorflow/python/ops/batch_norm_benchmark.py @@ -24,7 +24,7 @@ import tensorflow as tf from tensorflow.python.ops import gen_nn_ops FLAGS = tf.app.flags.FLAGS -tf.app.flags.DEFINE_boolean("use_gpu", False, """Run GPU benchmarks.""") +tf.app.flags.DEFINE_boolean("use_gpu", True, """Run GPU benchmarks.""") def batch_norm_op(tensor, mean, variance, beta, gamma, scale): @@ -100,116 +100,135 @@ def build_graph(device, input_shape, axes, num_layers, mode, scale, train): return [tensor] -def run_graph(device, input_shape, axes, num_layers, mode, scale, train, - num_iters): - """Run the graph and print its execution time. - - Args: - device: string, the device to run on. - input_shape: shape of the input tensor. - axes: axes that are to be normalized across. - num_layers: number of batch normalization layers in the graph. - mode: "op", "py" or "slow" depending on the implementation. - scale: scale after normalization. - train: if true, also run backprop. - num_iters: number of steps to run. - - Returns: - The duration of the run in seconds. - """ - graph = tf.Graph() - with graph.as_default(): - outputs = build_graph(device, input_shape, axes, num_layers, mode, scale, - train) - with tf.Session(graph=graph) as session: - tf.initialize_all_variables().run() - _ = session.run([out.op for out in outputs]) # warm up. - start_time = time.time() - for _ in range(num_iters): - _ = session.run([out.op for out in outputs]) - duration = time.time() - start_time - print("%s shape:%d/%d #layers:%d mode:%s scale:%r train:%r - %f secs" % - (device, len(input_shape), len(axes), num_layers, mode, scale, train, - duration / num_iters)) - return duration def print_difference(mode, t1, t2): """Print the difference in timing between two runs.""" difference = (t2 - t1) / t1 * 100.0 print("=== %s: %.1f%% ===" % (mode, difference)) -def main(unused_argv): - print("Forward convolution (lower layers).") - shape = [8, 128, 128, 32] - axes = [0, 1, 2] - t1 = run_graph("cpu", shape, axes, 10, "op", True, False, 5) - t2 = run_graph("cpu", shape, axes, 10, "py", True, False, 5) - t3 = run_graph("cpu", shape, axes, 10, "slow", True, False, 5) - print_difference("op vs py", t1, t2) - print_difference("py vs slow", t2, t3) - if FLAGS.use_gpu: - t1 = run_graph("gpu", shape, axes, 10, "op", True, False, 50) - t2 = run_graph("gpu", shape, axes, 10, "py", True, False, 50) - t3 = run_graph("gpu", shape, axes, 10, "slow", True, False, 50) +class BatchNormBenchmark(tf.test.Benchmark): + """Benchmark batch normalization.""" + + def _run_graph( + self, device, input_shape, axes, num_layers, mode, scale, train, + num_iters): + """Run the graph and print its execution time. + + Args: + device: string, the device to run on. + input_shape: shape of the input tensor. + axes: axes that are to be normalized across. + num_layers: number of batch normalization layers in the graph. + mode: "op", "py" or "slow" depending on the implementation. + scale: scale after normalization. + train: if true, also run backprop. + num_iters: number of steps to run. + + Returns: + The duration of the run in seconds. + """ + graph = tf.Graph() + with graph.as_default(): + outputs = build_graph(device, input_shape, axes, num_layers, mode, scale, + train) + with tf.Session(graph=graph) as session: + tf.initialize_all_variables().run() + _ = session.run([out.op for out in outputs]) # warm up. + start_time = time.time() + for _ in range(num_iters): + _ = session.run([out.op for out in outputs]) + duration = time.time() - start_time + print("%s shape:%d/%d #layers:%d mode:%s scale:%r train:%r - %f secs" % + (device, len(input_shape), len(axes), num_layers, mode, scale, train, + duration / num_iters)) + + name_template = ( + "batch_norm_{device}_input_shape_{shape}_axes_{axes}_mode_{mode}_" + "layers_{num_layers}_scale_{scale}_" + "train_{train}") + + self.report_benchmark( + name=name_template.format( + device=device, mode=mode, num_layers=num_layers, scale=scale, + train=train, + shape=str(input_shape).replace(" ", ""), + axes=str(axes)).replace(" ", ""), + iters=num_iters, wall_time=duration / num_iters) + + return duration + + def benchmark_batch_norm(self): + print("Forward convolution (lower layers).") + shape = [8, 128, 128, 32] + axes = [0, 1, 2] + t1 = self._run_graph("cpu", shape, axes, 10, "op", True, False, 5) + t2 = self._run_graph("cpu", shape, axes, 10, "py", True, False, 5) + t3 = self._run_graph("cpu", shape, axes, 10, "slow", True, False, 5) print_difference("op vs py", t1, t2) print_difference("py vs slow", t2, t3) - print("Forward/backward convolution (lower layers).") - t1 = run_graph("cpu", shape, axes, 10, "op", True, True, 5) - t2 = run_graph("cpu", shape, axes, 10, "py", True, True, 5) - t3 = run_graph("cpu", shape, axes, 10, "slow", True, True, 5) - print_difference("op vs py", t1, t2) - print_difference("py vs slow", t2, t3) - if FLAGS.use_gpu: - t1 = run_graph("gpu", shape, axes, 10, "op", True, True, 50) - t2 = run_graph("gpu", shape, axes, 10, "py", True, True, 50) - t2 = run_graph("gpu", shape, axes, 10, "slow", True, True, 50) + if FLAGS.use_gpu: + t1 = self._run_graph("gpu", shape, axes, 10, "op", True, False, 50) + t2 = self._run_graph("gpu", shape, axes, 10, "py", True, False, 50) + t3 = self._run_graph("gpu", shape, axes, 10, "slow", True, False, 50) + print_difference("op vs py", t1, t2) + print_difference("py vs slow", t2, t3) + print("Forward/backward convolution (lower layers).") + t1 = self._run_graph("cpu", shape, axes, 10, "op", True, True, 5) + t2 = self._run_graph("cpu", shape, axes, 10, "py", True, True, 5) + t3 = self._run_graph("cpu", shape, axes, 10, "slow", True, True, 5) print_difference("op vs py", t1, t2) print_difference("py vs slow", t2, t3) - print("Forward convolution (higher layers).") - shape = [256, 17, 17, 32] - axes = [0, 1, 2] - t1 = run_graph("cpu", shape, axes, 10, "op", True, False, 5) - t2 = run_graph("cpu", shape, axes, 10, "py", True, False, 5) - t3 = run_graph("cpu", shape, axes, 10, "slow", True, False, 5) - print_difference("op vs py", t1, t2) - print_difference("py vs slow", t2, t3) - if FLAGS.use_gpu: - t1 = run_graph("gpu", shape, axes, 10, "op", True, False, 50) - t2 = run_graph("gpu", shape, axes, 10, "py", True, False, 50) - t3 = run_graph("gpu", shape, axes, 10, "slow", True, False, 50) + if FLAGS.use_gpu: + t1 = self._run_graph("gpu", shape, axes, 10, "op", True, True, 50) + t2 = self._run_graph("gpu", shape, axes, 10, "py", True, True, 50) + t2 = self._run_graph("gpu", shape, axes, 10, "slow", True, True, 50) + print_difference("op vs py", t1, t2) + print_difference("py vs slow", t2, t3) + print("Forward convolution (higher layers).") + shape = [256, 17, 17, 32] + axes = [0, 1, 2] + t1 = self._run_graph("cpu", shape, axes, 10, "op", True, False, 5) + t2 = self._run_graph("cpu", shape, axes, 10, "py", True, False, 5) + t3 = self._run_graph("cpu", shape, axes, 10, "slow", True, False, 5) print_difference("op vs py", t1, t2) print_difference("py vs slow", t2, t3) - print("Forward/backward convolution (higher layers).") - t1 = run_graph("cpu", shape, axes, 10, "op", True, True, 5) - t2 = run_graph("cpu", shape, axes, 10, "py", True, True, 5) - t3 = run_graph("cpu", shape, axes, 10, "slow", True, True, 5) - print_difference("op vs py", t1, t2) - print_difference("py vs slow", t2, t3) - if FLAGS.use_gpu: - t1 = run_graph("gpu", shape, axes, 10, "op", True, True, 50) - t2 = run_graph("gpu", shape, axes, 10, "py", True, True, 50) - t3 = run_graph("gpu", shape, axes, 10, "slow", True, True, 50) + if FLAGS.use_gpu: + t1 = self._run_graph("gpu", shape, axes, 10, "op", True, False, 50) + t2 = self._run_graph("gpu", shape, axes, 10, "py", True, False, 50) + t3 = self._run_graph("gpu", shape, axes, 10, "slow", True, False, 50) + print_difference("op vs py", t1, t2) + print_difference("py vs slow", t2, t3) + print("Forward/backward convolution (higher layers).") + t1 = self._run_graph("cpu", shape, axes, 10, "op", True, True, 5) + t2 = self._run_graph("cpu", shape, axes, 10, "py", True, True, 5) + t3 = self._run_graph("cpu", shape, axes, 10, "slow", True, True, 5) print_difference("op vs py", t1, t2) print_difference("py vs slow", t2, t3) - print("Forward fully-connected.") - shape = [1024, 32] - axes = [0] - t1 = run_graph("cpu", shape, axes, 10, "py", True, False, 5) - t2 = run_graph("cpu", shape, axes, 10, "slow", True, False, 5) - print_difference("py vs slow", t1, t2) - if FLAGS.use_gpu: - t1 = run_graph("gpu", shape, axes, 10, "py", True, False, 50) - t2 = run_graph("gpu", shape, axes, 10, "slow", True, False, 50) + if FLAGS.use_gpu: + t1 = self._run_graph("gpu", shape, axes, 10, "op", True, True, 50) + t2 = self._run_graph("gpu", shape, axes, 10, "py", True, True, 50) + t3 = self._run_graph("gpu", shape, axes, 10, "slow", True, True, 50) + print_difference("op vs py", t1, t2) + print_difference("py vs slow", t2, t3) + print("Forward fully-connected.") + shape = [1024, 32] + axes = [0] + t1 = self._run_graph("cpu", shape, axes, 10, "py", True, False, 5) + t2 = self._run_graph("cpu", shape, axes, 10, "slow", True, False, 5) print_difference("py vs slow", t1, t2) - print("Forward/backward fully-connected.") - t1 = run_graph("cpu", shape, axes, 10, "py", True, True, 50) - t2 = run_graph("cpu", shape, axes, 10, "slow", True, True, 50) - print_difference("py vs slow", t1, t2) - if FLAGS.use_gpu: - t1 = run_graph("gpu", shape, axes, 10, "py", True, True, 5) - t2 = run_graph("gpu", shape, axes, 10, "slow", True, True, 5) + if FLAGS.use_gpu: + t1 = self._run_graph("gpu", shape, axes, 10, "py", True, False, 50) + t2 = self._run_graph("gpu", shape, axes, 10, "slow", True, False, 50) + print_difference("py vs slow", t1, t2) + print("Forward/backward fully-connected.") + t1 = self._run_graph("cpu", shape, axes, 10, "py", True, True, 50) + t2 = self._run_graph("cpu", shape, axes, 10, "slow", True, True, 50) print_difference("py vs slow", t1, t2) + if FLAGS.use_gpu: + t1 = self._run_graph("gpu", shape, axes, 10, "py", True, True, 5) + t2 = self._run_graph("gpu", shape, axes, 10, "slow", True, True, 5) + print_difference("py vs slow", t1, t2) if __name__ == "__main__": - tf.app.run() + tf.test.main() diff --git a/tensorflow/tools/test/run_and_gather_logs_lib.py b/tensorflow/tools/test/run_and_gather_logs_lib.py index 478a580483..afe8f210cc 100644 --- a/tensorflow/tools/test/run_and_gather_logs_lib.py +++ b/tensorflow/tools/test/run_and_gather_logs_lib.py @@ -92,6 +92,7 @@ def run_and_gather_logs(test_name, test_args): temp_directory = tempfile.mkdtemp(prefix="run_and_gather_logs") mangled_test_name = test_name.strip("/").replace("/", "_").replace(":", "_") test_file_prefix = os.path.join(temp_directory, mangled_test_name) + test_file_prefix = "%s." % test_file_prefix try: if not tf.gfile.Exists(test_executable): |