diff options
Diffstat (limited to 'tensorflow/core/util/use_cudnn.cc')
-rw-r--r-- | tensorflow/core/util/use_cudnn.cc | 46 |
1 files changed, 39 insertions, 7 deletions
diff --git a/tensorflow/core/util/use_cudnn.cc b/tensorflow/core/util/use_cudnn.cc index d7d03f151e..c119df6419 100644 --- a/tensorflow/core/util/use_cudnn.cc +++ b/tensorflow/core/util/use_cudnn.cc @@ -22,9 +22,9 @@ limitations under the License. namespace tensorflow { -#define ADD_CUDNN_FLAG(func_name, flag_name, default_value) \ +#define ADD_BOOL_CUDNN_FLAG(func_name, flag_name, default_value) \ bool func_name() { \ - bool value; \ + bool value = default_value; \ Status status = ReadBoolFromEnvVar(#flag_name, default_value, &value); \ if (!status.ok()) { \ LOG(ERROR) << status; \ @@ -32,12 +32,44 @@ namespace tensorflow { return value; \ } -ADD_CUDNN_FLAG(CanUseCudnn, TF_USE_CUDNN, true); -ADD_CUDNN_FLAG(CudnnUseAutotune, TF_CUDNN_USE_AUTOTUNE, true); -ADD_CUDNN_FLAG(CudnnDisableConv1x1Optimization, - TF_CUDNN_DISABLE_CONV_1X1_OPTIMIZATION, false); +ADD_BOOL_CUDNN_FLAG(CanUseCudnn, TF_USE_CUDNN, true); +ADD_BOOL_CUDNN_FLAG(CudnnUseAutotune, TF_CUDNN_USE_AUTOTUNE, true); +// Whether to auto-tuning Cudnn RNN forward and backward pass to pick +// statistically the best cudnnRNNAlgo_t and cudnnMathType_t. +// The flag is disabled when TF_DEBUG_CUDNN_RNN is turned on. +ADD_BOOL_CUDNN_FLAG(CudnnRnnUseAutotune, TF_CUDNN_RNN_USE_AUTOTUNE, true); +ADD_BOOL_CUDNN_FLAG(CudnnDisableConv1x1Optimization, + TF_CUDNN_DISABLE_CONV_1X1_OPTIMIZATION, false); -#undef ADD_CUDNN_FLAG +// Whether to run Cudnn RNN forward and backward in debug mode, where users can +// force a specified cudnnRNNAlgo_t and cudnnMathType_t, when used together with +// the following two env vars: +// TF_DEBUG_CUDNN_RNN_USE_TENSOR_OPS +// TF_DEBUG_CUDNN_RNN_ALGO +// By default it is disabled and only intended for testing and profiling. +ADD_BOOL_CUDNN_FLAG(DebugCudnnRnn, TF_DEBUG_CUDNN_RNN, false); +// If using TENSOR_OP_MATH in Cudnn RNN for both forward and backward pass. Only +// effective when TF_DEBUG_CUDNN_RNN is true. +// Note none of the persistent RNN algorithm support TENSOR_OP_MATH before +// Cudnn 7.1. See Nvidia Cudnn manual for more details. +ADD_BOOL_CUDNN_FLAG(DebugCudnnRnnUseTensorOps, + TF_DEBUG_CUDNN_RNN_USE_TENSOR_OPS, false); +#undef ADD_BOOL_CUDNN_FLAG + +#define ADD_INT64_CUDNN_FLAG(func_name, flag_name, default_value) \ + int64 func_name() { \ + int64 value = default_value; \ + Status status = ReadInt64FromEnvVar(#flag_name, default_value, &value); \ + if (!status.ok()) { \ + LOG(ERROR) << status; \ + } \ + return value; \ + } +// Cudnn RNN algorithm to use for both forward and backward pass. Only effective +// when TF_DEBUG_CUDNN_RNN is true. See Nvidia Cudnn manual for allowed +// cudnnRNNAlgo_t. +ADD_INT64_CUDNN_FLAG(DebugCudnnRnnAlgo, TF_DEBUG_CUDNN_RNN_ALGO, -1); +#undef ADD_INT64_CUDNN_FLAG FP16ConvMode CudnnConvComputeMode() { string value; |