Change Autotune to the global scope. Switch to hash table for the autotune

parameters. Most benchmarks stay about the same. The first step of Conv2DBenchmark.benchmarkGPUConvStackFirst had been 10-20 times faster. Change: 135718551
author: Xiaoqiang Zheng <zhengxq@google.com> 2016-10-10 13:11:13 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-10-10 14:16:57 -0700
commit: b2b89f08d8e82e190e4d1cc2d680cd28623efa32 (patch)
tree: 33d0e7c40723c1b41cdfdef136e9fa4b595b2171 /tensorflow/python/kernel_tests/conv_ops_test.py
parent: 59f1b5afef6a41bd3187c00d649c8082e8436773 (diff)
1 files changed, 34 insertions, 0 deletions
diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
index 972f1ffa7b..90fb4240cb 100644
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+import time
 
 import numpy as np
 import tensorflow as tf
@@ -1211,6 +1212,39 @@ class DeepConv2DTest(tf.test.TestCase):
     self._RunTestCases([1, 1], "SAME")
 
 
+class Conv2DBenchmark(tf.test.Benchmark):
+
+  def benchmarkGPUConvStackFirst(self):
+    # Benchmark the first iteration of a conv-net with many identical conv
+    # operations.
+    if not tf.test.is_gpu_available():
+      return
+
+    with tf.Graph().as_default(), tf.Session() as session:
+      batch_size = 1
+      timesteps = 600
+      features = 1
+
+      inputs = tf.random_uniform(
+          [batch_size, 1, timesteps, features], seed=1234)
+      num_outputs_list = [512] * 40 + [1]
+      kernel_w = 3
+      x = inputs
+      for num_outputs in num_outputs_list:
+        x = tf.contrib.layers.convolution2d(x, num_outputs, [1, kernel_w])
+      outputs = x
+
+      tf.initialize_all_variables().run()
+      num_iterations = 4
+      for iter_index in xrange(num_iterations):
+        start = time.time()
+        session.run(outputs)
+        wall_time = time.time() - start
+        self.report_benchmark(
+            name="conv_stack_iter_%d" % iter_index, wall_time=wall_time)
+        print("conv_stack_iter_%d: %.4f" % (iter_index, wall_time))
+
+
 def GetInceptionFwdTest(input_size, filter_size, stride, padding):
   def Test(self):
     tf.logging.info("Testing InceptionFwd %s", (input_size, filter_size,
author	Xiaoqiang Zheng <zhengxq@google.com>	2016-10-10 13:11:13 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-10-10 14:16:57 -0700
commit	b2b89f08d8e82e190e4d1cc2d680cd28623efa32 (patch)
tree	33d0e7c40723c1b41cdfdef136e9fa4b595b2171 /tensorflow/python/kernel_tests/conv_ops_test.py
parent	59f1b5afef6a41bd3187c00d649c8082e8436773 (diff)