aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda/cuda_dnn.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_dnn.cc')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.cc75
1 files changed, 45 insertions, 30 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 725f6aeaa4..55408ab9ab 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -1986,15 +1986,14 @@ GetCudnnConvolutionBackwardFilterAlgo(const CudnnHandle& cudnn,
port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace(
Stream* stream, const CudnnHandle& cudnn,
- const dnn::AlgorithmDesc& algorithm_desc,
const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter,
const CudnnConvolutionDescriptor& conv,
- const CudnnTensorDescriptor& output_nd,
+ const CudnnTensorDescriptor& output_nd, dnn::AlgorithmDesc* algorithm_desc,
ScratchAllocator* scratch_allocator) {
// TODO(csigg): This has side effects on the convolution descriptor. It is
// functionally correct because the convolution is run with the algorithm of
// the last call to this function, but should be fixed anyway.
- conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled());
+ conv.set_use_tensor_op_math(algorithm_desc->tensor_ops_enabled());
// Query the size of the workspace and allocate it.
size_t size_in_bytes;
@@ -2002,8 +2001,14 @@ port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace(
cudnn.handle(),
/*xDesc=*/input_nd.handle(),
/*wDesc=*/filter.handle(), /*convDesc=*/conv.handle(),
- /*yDesc=*/output_nd.handle(), /*algo=*/ToConvForwardAlgo(algorithm_desc),
+ /*yDesc=*/output_nd.handle(), /*algo=*/ToConvForwardAlgo(*algorithm_desc),
/*sizeInBytes=*/&size_in_bytes));
+
+ if (TF_PREDICT_FALSE(!algorithm_desc)) {
+ return port::Status(port::error::INVALID_ARGUMENT,
+ "No AlgorithmDesc provided");
+ }
+ algorithm_desc->set_scratch_size(size_in_bytes);
int64 size_in_bytes_int64 = size_in_bytes;
if (TF_PREDICT_FALSE(size_in_bytes_int64 < 0)) {
@@ -2028,15 +2033,14 @@ port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace(
port::StatusOr<DeviceMemory<uint8>>
AllocateCudnnConvolutionBackwardDataWorkspace(
Stream* stream, const CudnnHandle& cudnn,
- const dnn::AlgorithmDesc& algorithm_desc,
const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter,
const CudnnConvolutionDescriptor& conv,
- const CudnnTensorDescriptor& output_nd,
+ const CudnnTensorDescriptor& output_nd, dnn::AlgorithmDesc* algorithm_desc,
ScratchAllocator* scratch_allocator) {
// TODO(csigg): This has side effects on the convolution descriptor. It is
// functionally correct because the convolution is run with the algorithm of
// the last call to this function, but should be fixed anyway.
- conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled());
+ conv.set_use_tensor_op_math(algorithm_desc->tensor_ops_enabled());
// Query the size of the workspace and allocate it.
size_t size_in_bytes;
@@ -2046,8 +2050,14 @@ AllocateCudnnConvolutionBackwardDataWorkspace(
/*dyDesc=*/output_nd.handle(),
/*convDesc=*/conv.handle(),
/*dxDesc=*/input_nd.handle(),
- /*algo=*/ToConvBackwardDataAlgo(algorithm_desc),
+ /*algo=*/ToConvBackwardDataAlgo(*algorithm_desc),
/*sizeInBytes=*/&size_in_bytes));
+
+ if (TF_PREDICT_FALSE(!algorithm_desc)) {
+ return port::Status(port::error::INVALID_ARGUMENT,
+ "No AlgorithmDesc provided");
+ }
+ algorithm_desc->set_scratch_size(size_in_bytes);
int64 size_in_bytes_int64 = size_in_bytes;
if (TF_PREDICT_FALSE(size_in_bytes_int64 < 0)) {
@@ -2072,15 +2082,14 @@ AllocateCudnnConvolutionBackwardDataWorkspace(
port::StatusOr<DeviceMemory<uint8>>
AllocateCudnnConvolutionBackwardFilterWorkspace(
Stream* stream, const CudnnHandle& cudnn,
- const dnn::AlgorithmDesc& algorithm_desc,
const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter,
const CudnnConvolutionDescriptor& conv,
- const CudnnTensorDescriptor& output_nd,
+ const CudnnTensorDescriptor& output_nd, dnn::AlgorithmDesc* algorithm_desc,
ScratchAllocator* scratch_allocator) {
// TODO(csigg): This has side effects on the convolution descriptor. It is
// functionally correct because the convolution is run with the algorithm of
// the last call to this function, but should be fixed anyway.
- conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled());
+ conv.set_use_tensor_op_math(algorithm_desc->tensor_ops_enabled());
// Query the size of the workspace and allocate it.
size_t size_in_bytes;
@@ -2090,8 +2099,14 @@ AllocateCudnnConvolutionBackwardFilterWorkspace(
/*dyDesc=*/output_nd.handle(),
/*convDesc=*/conv.handle(),
/*gradDesc=*/filter.handle(),
- /*algo=*/ToConvBackwardFilterAlgo(algorithm_desc),
+ /*algo=*/ToConvBackwardFilterAlgo(*algorithm_desc),
/*sizeInBytes=*/&size_in_bytes));
+
+ if (TF_PREDICT_FALSE(!algorithm_desc)) {
+ return port::Status(port::error::INVALID_ARGUMENT,
+ "No AlgorithmDesc provided");
+ }
+ algorithm_desc->set_scratch_size(size_in_bytes);
int64 size_in_bytes_int64 = size_in_bytes;
if (TF_PREDICT_FALSE(size_in_bytes_int64 < 0)) {
@@ -2138,7 +2153,7 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm(
}
auto scratch_or = AllocateCudnnConvolutionForwardWorkspace(
- stream, cudnn, algo_desc, input_nd, filter, conv, output_nd,
+ stream, cudnn, input_nd, filter, conv, output_nd, &algo_desc,
scratch_allocator);
if (scratch_or.ok()) {
@@ -2155,11 +2170,11 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm(
"while a secondary algorithm is not provided.");
}
- SE_ASSIGN_OR_RETURN(
- *scratch, AllocateCudnnConvolutionForwardWorkspace(
- stream, cudnn, algorithm_config.algorithm_no_scratch(),
- input_nd, filter, conv, output_nd, scratch_allocator));
- return algorithm_config.algorithm_no_scratch();
+ algo_desc = algorithm_config.algorithm_no_scratch();
+ SE_ASSIGN_OR_RETURN(*scratch, AllocateCudnnConvolutionForwardWorkspace(
+ stream, cudnn, input_nd, filter, conv,
+ output_nd, &algo_desc, scratch_allocator));
+ return algo_desc;
}
port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm(
@@ -2187,7 +2202,7 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm(
}
auto scratch_or = AllocateCudnnConvolutionBackwardDataWorkspace(
- stream, cudnn, algo_desc, input_nd, filter, conv, output_nd,
+ stream, cudnn, input_nd, filter, conv, output_nd, &algo_desc,
scratch_allocator);
if (scratch_or.ok()) {
@@ -2204,11 +2219,11 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm(
"while a secondary algorithm is not provided.");
}
- SE_ASSIGN_OR_RETURN(
- *scratch, AllocateCudnnConvolutionBackwardDataWorkspace(
- stream, cudnn, algorithm_config.algorithm_no_scratch(),
- input_nd, filter, conv, output_nd, scratch_allocator));
- return algorithm_config.algorithm_no_scratch();
+ algo_desc = algorithm_config.algorithm_no_scratch();
+ SE_ASSIGN_OR_RETURN(*scratch, AllocateCudnnConvolutionBackwardDataWorkspace(
+ stream, cudnn, input_nd, filter, conv,
+ output_nd, &algo_desc, scratch_allocator));
+ return algo_desc;
}
port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardFilterAlgorithm(
@@ -2236,7 +2251,7 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardFilterAlgorithm(
}
auto scratch_or = AllocateCudnnConvolutionBackwardFilterWorkspace(
- stream, cudnn, algo_desc, input_nd, filter, conv, output_nd,
+ stream, cudnn, input_nd, filter, conv, output_nd, &algo_desc,
scratch_allocator);
if (scratch_or.ok()) {
@@ -2253,11 +2268,11 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardFilterAlgorithm(
"while a secondary algorithm is not provided.");
}
- SE_ASSIGN_OR_RETURN(*scratch,
- AllocateCudnnConvolutionBackwardFilterWorkspace(
- stream, cudnn, algorithm_config.algorithm(), input_nd,
- filter, conv, output_nd, scratch_allocator));
- return algorithm_config.algorithm_no_scratch();
+ algo_desc = algorithm_config.algorithm_no_scratch();
+ SE_ASSIGN_OR_RETURN(*scratch, AllocateCudnnConvolutionBackwardFilterWorkspace(
+ stream, cudnn, input_nd, filter, conv,
+ output_nd, &algo_desc, scratch_allocator));
+ return algo_desc;
}
// A helper class to set env-vars and choose options for cudnn-related