diff options
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_dnn.cc')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.cc | 45 |
1 files changed, 36 insertions, 9 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index d4f2fd2625..766a0dafb5 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -791,6 +791,11 @@ class CudnnActivationDescriptor { double relu_ceiling = 0.0; cudnnActivationMode_t mode; switch (activation_mode) { +#if CUDNN_VERSION >= 7100 + case dnn::ActivationMode::kNone: + mode = CUDNN_ACTIVATION_IDENTITY; + break; +#endif case dnn::ActivationMode::kRelu6: relu_ceiling = 6.0; mode = CUDNN_ACTIVATION_CLIPPED_RELU; @@ -2480,10 +2485,11 @@ port::Status CudnnSupport::DoFusedConvolveImpl( DeviceMemory<Type>* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - if (activation_mode != dnn::ActivationMode::kRelu) { + if (activation_mode != dnn::ActivationMode::kRelu && + activation_mode != dnn::ActivationMode::kNone) { return port::Status(port::error::INVALID_ARGUMENT, "cudnnConvolutionBiasActivationForward() only supports " - "Relu activation."); + "Relu or None activation."); } CudnnTensorDescriptor conv_input_nd( @@ -3074,6 +3080,22 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl( } } + // Cudnn 7.1.4 has a bug if the workspace of the following convolution is not + // zero-initialized. + // TODO(timshen): Add an nvbugs/ link. + if (CUDNN_VERSION >= 7000 && + algorithm_config.algorithm().algo_id() == + CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 && + cudnn_type == CUDNN_DATA_HALF && + algorithm_config.algorithm().tensor_ops_enabled() && + input_descriptor.layout() == dnn::DataLayout::kBatchYXDepth && + filter_descriptor.layout() == dnn::FilterLayout::kOutputInputYX && + output_descriptor.layout() == dnn::DataLayout::kBatchDepthYX && + (convolution_descriptor.vertical_filter_stride() > 1 || + convolution_descriptor.horizontal_filter_stride() > 1)) { + stream->ThenMemZero(&scratch, scratch.size()); + } + RETURN_IF_CUDNN_ERROR( cudnnConvolutionBackwardData(cudnn.handle(), /*alpha=*/alpha, @@ -3587,7 +3609,7 @@ bool CudnnSupport::DoPoolForward( const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<double>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<double>* output_data) { + DeviceMemory<double>* output_data, ScratchAllocator* workspace_allocator) { // Alpha is the scaling factor for input. double alpha = 1.0; // Beta is the scaling factor for output. @@ -3612,7 +3634,7 @@ bool CudnnSupport::DoPoolForward( const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<float>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<float>* output_data) { + DeviceMemory<float>* output_data, ScratchAllocator* workspace_allocator) { // Alpha is the scaling factor for input. float alpha = 1.0; // Beta is the scaling factor for output. @@ -3637,7 +3659,8 @@ bool CudnnSupport::DoPoolForward( const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<Eigen::half>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<Eigen::half>* output_data) { + DeviceMemory<Eigen::half>* output_data, + ScratchAllocator* workspace_allocator) { // Alpha is the scaling factor for input. float alpha = 1.0; // Beta is the scaling factor for output. @@ -3663,7 +3686,8 @@ bool CudnnSupport::DoPoolBackward( const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<double>& output_data, const DeviceMemory<double>& input_diff_data, - DeviceMemory<double>* output_diff_data) { + DeviceMemory<double>* output_diff_data, + ScratchAllocator* workspace_allocator) { // Alpha is the scaling factor for input. double alpha = 1.0; // Beta is the scaling factor for output. @@ -3692,7 +3716,8 @@ bool CudnnSupport::DoPoolBackward( const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<float>& output_data, const DeviceMemory<float>& input_diff_data, - DeviceMemory<float>* output_diff_data) { + DeviceMemory<float>* output_diff_data, + ScratchAllocator* workspace_allocator) { // Alpha is the scaling factor for input. float alpha = 1.0; // Beta is the scaling factor for output. @@ -3721,7 +3746,8 @@ bool CudnnSupport::DoPoolBackward( const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<Eigen::half>& output_data, const DeviceMemory<Eigen::half>& input_diff_data, - DeviceMemory<Eigen::half>* output_diff_data) { + DeviceMemory<Eigen::half>* output_diff_data, + ScratchAllocator* workspace_allocator) { // Alpha is the scaling factor for input. float alpha = 1.0; // Beta is the scaling factor for output. @@ -3790,7 +3816,8 @@ bool CudnnSupport::DoNormalizeBackwardWithDimensions( const dnn::BatchDescriptor& dimensions, const DeviceMemory<float>& raw_data, const DeviceMemory<float>& normalized_data, const DeviceMemory<float>& normalized_variable_gradient, - DeviceMemory<float>* raw_variable_gradient) { + DeviceMemory<float>* raw_variable_gradient, + ScratchAllocator* workspace_allocator) { // Check for unsupported modes. if (normalize_descriptor.wrap_around()) { LOG(ERROR) << "CUDA LRN does not support cudnn-around mode"; |