aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda/cuda_dnn.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_dnn.cc')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.cc45
1 files changed, 36 insertions, 9 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index d4f2fd2625..766a0dafb5 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -791,6 +791,11 @@ class CudnnActivationDescriptor {
double relu_ceiling = 0.0;
cudnnActivationMode_t mode;
switch (activation_mode) {
+#if CUDNN_VERSION >= 7100
+ case dnn::ActivationMode::kNone:
+ mode = CUDNN_ACTIVATION_IDENTITY;
+ break;
+#endif
case dnn::ActivationMode::kRelu6:
relu_ceiling = 6.0;
mode = CUDNN_ACTIVATION_CLIPPED_RELU;
@@ -2480,10 +2485,11 @@ port::Status CudnnSupport::DoFusedConvolveImpl(
DeviceMemory<Type>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
- if (activation_mode != dnn::ActivationMode::kRelu) {
+ if (activation_mode != dnn::ActivationMode::kRelu &&
+ activation_mode != dnn::ActivationMode::kNone) {
return port::Status(port::error::INVALID_ARGUMENT,
"cudnnConvolutionBiasActivationForward() only supports "
- "Relu activation.");
+ "Relu or None activation.");
}
CudnnTensorDescriptor conv_input_nd(
@@ -3074,6 +3080,22 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl(
}
}
+ // Cudnn 7.1.4 has a bug if the workspace of the following convolution is not
+ // zero-initialized.
+ // TODO(timshen): Add an nvbugs/ link.
+ if (CUDNN_VERSION >= 7000 &&
+ algorithm_config.algorithm().algo_id() ==
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 &&
+ cudnn_type == CUDNN_DATA_HALF &&
+ algorithm_config.algorithm().tensor_ops_enabled() &&
+ input_descriptor.layout() == dnn::DataLayout::kBatchYXDepth &&
+ filter_descriptor.layout() == dnn::FilterLayout::kOutputInputYX &&
+ output_descriptor.layout() == dnn::DataLayout::kBatchDepthYX &&
+ (convolution_descriptor.vertical_filter_stride() > 1 ||
+ convolution_descriptor.horizontal_filter_stride() > 1)) {
+ stream->ThenMemZero(&scratch, scratch.size());
+ }
+
RETURN_IF_CUDNN_ERROR(
cudnnConvolutionBackwardData(cudnn.handle(),
/*alpha=*/alpha,
@@ -3587,7 +3609,7 @@ bool CudnnSupport::DoPoolForward(
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<double>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<double>* output_data) {
+ DeviceMemory<double>* output_data, ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
double alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3612,7 +3634,7 @@ bool CudnnSupport::DoPoolForward(
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<float>* output_data) {
+ DeviceMemory<float>* output_data, ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3637,7 +3659,8 @@ bool CudnnSupport::DoPoolForward(
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<Eigen::half>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<Eigen::half>* output_data) {
+ DeviceMemory<Eigen::half>* output_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3663,7 +3686,8 @@ bool CudnnSupport::DoPoolBackward(
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<double>& output_data,
const DeviceMemory<double>& input_diff_data,
- DeviceMemory<double>* output_diff_data) {
+ DeviceMemory<double>* output_diff_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
double alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3692,7 +3716,8 @@ bool CudnnSupport::DoPoolBackward(
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<float>& output_data,
const DeviceMemory<float>& input_diff_data,
- DeviceMemory<float>* output_diff_data) {
+ DeviceMemory<float>* output_diff_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3721,7 +3746,8 @@ bool CudnnSupport::DoPoolBackward(
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<Eigen::half>& output_data,
const DeviceMemory<Eigen::half>& input_diff_data,
- DeviceMemory<Eigen::half>* output_diff_data) {
+ DeviceMemory<Eigen::half>* output_diff_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3790,7 +3816,8 @@ bool CudnnSupport::DoNormalizeBackwardWithDimensions(
const dnn::BatchDescriptor& dimensions, const DeviceMemory<float>& raw_data,
const DeviceMemory<float>& normalized_data,
const DeviceMemory<float>& normalized_variable_gradient,
- DeviceMemory<float>* raw_variable_gradient) {
+ DeviceMemory<float>* raw_variable_gradient,
+ ScratchAllocator* workspace_allocator) {
// Check for unsupported modes.
if (normalize_descriptor.wrap_around()) {
LOG(ERROR) << "CUDA LRN does not support cudnn-around mode";