Add the autotune version for the backward passes. It is currently

disabled by default, and can be enabled through the env-var "TF_CUDNN_USE_AUTOTUNE=1". It will eventually be turned on by default. The following is the benchmarks with large enough changes. Benchmark Base (ns) New (ns) Improvement ------------------------------------------------------------------ BM_ConvFloatFwdGPU_conv13 3810933 2167784 +43.1% BM_ConvFloatFwdGPU_conv23 4173607 2450503 +41.3% BM_ConvFloatFwdGPU_conv54 26731131 7098361 +73.4% BM_ConvFloatBkInGPU_conv1 1496407 1039979 +30.5% BM_ConvFloatBkInGPU_conv2 1501744 999774 +33.4% BM_ConvFloatBkInGPU_conv12 6826426 968258 +85.8% BM_ConvFloatBkFilterGPU_conv13 3852185 2110649 +45.2% BM_ConvFloatBkInGPU_conv15 7011109 910837 +87.0% BM_ConvFloatBkInGPU_conv17 2724054 1930013 +29.1% BM_ConvFloatBkInGPU_conv18 2940634 1846089 +37.2% BM_ConvFloatBkInGPU_conv19 2995599 1853970 +38.1% BM_ConvFloatBkInGPU_conv22 2685772 1940984 +27.7% BM_ConvFloatBkInGPU_conv24 2343034 1519468 +35.1% BM_ConvFloatBkInGPU_conv27 2339471 1516779 +35.2% BM_ConvFloatBkFilterGPU_conv28 3091452 1880773 +39.2% BM_ConvFloatBkInGPU_conv31 1265237 1120846 +11.4% BM_ConvFloatBkInGPU_conv46 3346414 2070659 +38.1% BM_ConvFloatBkFilterGPU_conv52 20677347 14342254 +30.6% BM_ConvFloatBkInGPU_conv54 13291278 10495521 +21.0% Change: 122067373
author: Xiaoqiang Zheng <zhengxq@google.com> 2016-05-11 09:28:36 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-05-11 10:32:20 -0700
commit: 939ede027be73ecafcc422371afe27dceccc720d (patch)
tree: 2ac4b3f35f4e0744d5993271a01185e6bcc8905e /tensorflow/stream_executor/stream_executor_pimpl.h
parent: 1f8fe742e11de53ccbb34d9fa540302156cb1655 (diff)
1 files changed, 9 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h
index 31b110a8e0..e424411143 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.h
+++ b/tensorflow/stream_executor/stream_executor_pimpl.h
@@ -341,6 +341,15 @@ class StreamExecutor {
   // Get the list of supported algorithms for the forward convolution opeartion.
   bool GetConvolveAlgorithms(std::vector<dnn::AlgorithmType> *out_algorithms);
 
+  // Get the list of supported algorithms for the backward convolution on data.
+  bool GetConvolveBackwardDataAlgorithms(
+      std::vector<dnn::AlgorithmType> *out_algorithms);
+
+  // Get the list of supported algorithms for the backward convolution on the
+  // filter.
+  bool GetConvolveBackwardFilterAlgorithms(
+      std::vector<dnn::AlgorithmType> *out_algorithms);
+
   // Returns the device ordinal that this StreamExecutor was initialized with.
   // Meaningless before initialization.
   int device_ordinal() const { return device_ordinal_; }
author	Xiaoqiang Zheng <zhengxq@google.com>	2016-05-11 09:28:36 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-05-11 10:32:20 -0700
commit	939ede027be73ecafcc422371afe27dceccc720d (patch)
tree	2ac4b3f35f4e0744d5993271a01185e6bcc8905e /tensorflow/stream_executor/stream_executor_pimpl.h
parent	1f8fe742e11de53ccbb34d9fa540302156cb1655 (diff)