diff options
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_dnn.cc')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.cc | 63 |
1 files changed, 42 insertions, 21 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 1c3940e92c..f3955c3455 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -1986,15 +1986,15 @@ GetCudnnConvolutionBackwardFilterAlgo(const CudnnHandle& cudnn, port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace( Stream* stream, const CudnnHandle& cudnn, - const dnn::AlgorithmDesc& algorithm_desc, const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter, const CudnnConvolutionDescriptor& conv, const CudnnTensorDescriptor& output_nd, + dnn::AlgorithmDesc* algorithm_desc, ScratchAllocator* scratch_allocator) { // TODO(csigg): This has side effects on the convolution descriptor. It is // functionally correct because the convolution is run with the algorithm of // the last call to this function, but should be fixed anyway. - conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled()); + conv.set_use_tensor_op_math(algorithm_desc->tensor_ops_enabled()); // Query the size of the workspace and allocate it. size_t size_in_bytes; @@ -2002,8 +2002,14 @@ port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace( cudnn.handle(), /*xDesc=*/input_nd.handle(), /*wDesc=*/filter.handle(), /*convDesc=*/conv.handle(), - /*yDesc=*/output_nd.handle(), /*algo=*/ToConvForwardAlgo(algorithm_desc), + /*yDesc=*/output_nd.handle(), /*algo=*/ToConvForwardAlgo(*algorithm_desc), /*sizeInBytes=*/&size_in_bytes)); + + if (TF_PREDICT_FALSE(!algorithm_desc)) { + return port::Status(port::error::INVALID_ARGUMENT, + "No AlgorithmDesc provided"); + } + algorithm_desc->set_scratch_size(size_in_bytes); int64 size_in_bytes_int64 = size_in_bytes; if (TF_PREDICT_FALSE(size_in_bytes_int64 < 0)) { @@ -2028,15 +2034,15 @@ port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace( port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionBackwardDataWorkspace( Stream* stream, const CudnnHandle& cudnn, - const dnn::AlgorithmDesc& algorithm_desc, const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter, const CudnnConvolutionDescriptor& conv, const CudnnTensorDescriptor& output_nd, + dnn::AlgorithmDesc* algorithm_desc, ScratchAllocator* scratch_allocator) { // TODO(csigg): This has side effects on the convolution descriptor. It is // functionally correct because the convolution is run with the algorithm of // the last call to this function, but should be fixed anyway. - conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled()); + conv.set_use_tensor_op_math(algorithm_desc->tensor_ops_enabled()); // Query the size of the workspace and allocate it. size_t size_in_bytes; @@ -2046,8 +2052,14 @@ AllocateCudnnConvolutionBackwardDataWorkspace( /*dyDesc=*/output_nd.handle(), /*convDesc=*/conv.handle(), /*dxDesc=*/input_nd.handle(), - /*algo=*/ToConvBackwardDataAlgo(algorithm_desc), + /*algo=*/ToConvBackwardDataAlgo(*algorithm_desc), /*sizeInBytes=*/&size_in_bytes)); + + if (TF_PREDICT_FALSE(!algorithm_desc)) { + return port::Status(port::error::INVALID_ARGUMENT, + "No AlgorithmDesc provided"); + } + algorithm_desc->set_scratch_size(size_in_bytes); int64 size_in_bytes_int64 = size_in_bytes; if (TF_PREDICT_FALSE(size_in_bytes_int64 < 0)) { @@ -2072,15 +2084,15 @@ AllocateCudnnConvolutionBackwardDataWorkspace( port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionBackwardFilterWorkspace( Stream* stream, const CudnnHandle& cudnn, - const dnn::AlgorithmDesc& algorithm_desc, const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter, const CudnnConvolutionDescriptor& conv, const CudnnTensorDescriptor& output_nd, + dnn::AlgorithmDesc* algorithm_desc, ScratchAllocator* scratch_allocator) { // TODO(csigg): This has side effects on the convolution descriptor. It is // functionally correct because the convolution is run with the algorithm of // the last call to this function, but should be fixed anyway. - conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled()); + conv.set_use_tensor_op_math(algorithm_desc->tensor_ops_enabled()); // Query the size of the workspace and allocate it. size_t size_in_bytes; @@ -2090,8 +2102,14 @@ AllocateCudnnConvolutionBackwardFilterWorkspace( /*dyDesc=*/output_nd.handle(), /*convDesc=*/conv.handle(), /*gradDesc=*/filter.handle(), - /*algo=*/ToConvBackwardFilterAlgo(algorithm_desc), + /*algo=*/ToConvBackwardFilterAlgo(*algorithm_desc), /*sizeInBytes=*/&size_in_bytes)); + + if (TF_PREDICT_FALSE(!algorithm_desc)) { + return port::Status(port::error::INVALID_ARGUMENT, + "No AlgorithmDesc provided"); + } + algorithm_desc->set_scratch_size(size_in_bytes); int64 size_in_bytes_int64 = size_in_bytes; if (TF_PREDICT_FALSE(size_in_bytes_int64 < 0)) { @@ -2138,7 +2156,7 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm( } auto scratch_or = AllocateCudnnConvolutionForwardWorkspace( - stream, cudnn, algo_desc, input_nd, filter, conv, output_nd, + stream, cudnn, input_nd, filter, conv, output_nd, &algo_desc, scratch_allocator); if (scratch_or.ok()) { @@ -2155,11 +2173,12 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm( "while a secondary algorithm is not provided."); } + algo_desc = algorithm_config.algorithm_no_scratch(); SE_ASSIGN_OR_RETURN( *scratch, AllocateCudnnConvolutionForwardWorkspace( - stream, cudnn, algorithm_config.algorithm_no_scratch(), - input_nd, filter, conv, output_nd, scratch_allocator)); - return algorithm_config.algorithm_no_scratch(); + stream, cudnn, + input_nd, filter, conv, output_nd, &algo_desc, scratch_allocator)); + return algo_desc; } port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm( @@ -2187,7 +2206,7 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm( } auto scratch_or = AllocateCudnnConvolutionBackwardDataWorkspace( - stream, cudnn, algo_desc, input_nd, filter, conv, output_nd, + stream, cudnn, input_nd, filter, conv, output_nd, &algo_desc, scratch_allocator); if (scratch_or.ok()) { @@ -2204,11 +2223,12 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm( "while a secondary algorithm is not provided."); } + algo_desc = algorithm_config.algorithm_no_scratch(); SE_ASSIGN_OR_RETURN( *scratch, AllocateCudnnConvolutionBackwardDataWorkspace( - stream, cudnn, algorithm_config.algorithm_no_scratch(), - input_nd, filter, conv, output_nd, scratch_allocator)); - return algorithm_config.algorithm_no_scratch(); + stream, cudnn, + input_nd, filter, conv, output_nd, &algo_desc, scratch_allocator)); + return algo_desc; } port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardFilterAlgorithm( @@ -2236,7 +2256,7 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardFilterAlgorithm( } auto scratch_or = AllocateCudnnConvolutionBackwardFilterWorkspace( - stream, cudnn, algo_desc, input_nd, filter, conv, output_nd, + stream, cudnn, input_nd, filter, conv, output_nd, &algo_desc, scratch_allocator); if (scratch_or.ok()) { @@ -2253,11 +2273,12 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardFilterAlgorithm( "while a secondary algorithm is not provided."); } + algo_desc = algorithm_config.algorithm_no_scratch(); SE_ASSIGN_OR_RETURN(*scratch, AllocateCudnnConvolutionBackwardFilterWorkspace( - stream, cudnn, algorithm_config.algorithm(), input_nd, - filter, conv, output_nd, scratch_allocator)); - return algorithm_config.algorithm_no_scratch(); + stream, cudnn, input_nd, + filter, conv, output_nd, &algo_desc, scratch_allocator)); + return algo_desc; } // A helper class to set env-vars and choose options for cudnn-related |