1 files changed, 42 insertions, 21 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 1c3940e92c..f3955c3455 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -1986,15 +1986,15 @@ GetCudnnConvolutionBackwardFilterAlgo(const CudnnHandle& cudnn,
 
 port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace(
     Stream* stream, const CudnnHandle& cudnn,
-    const dnn::AlgorithmDesc& algorithm_desc,
     const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter,
     const CudnnConvolutionDescriptor& conv,
     const CudnnTensorDescriptor& output_nd,
+    dnn::AlgorithmDesc* algorithm_desc,
     ScratchAllocator* scratch_allocator) {
   // TODO(csigg): This has side effects on the convolution descriptor. It is
   // functionally correct because the convolution is run with the algorithm of
   // the last call to this function, but should be fixed anyway.
-  conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled());
+  conv.set_use_tensor_op_math(algorithm_desc->tensor_ops_enabled());
 
   // Query the size of the workspace and allocate it.
   size_t size_in_bytes;
@@ -2002,8 +2002,14 @@ port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace(
       cudnn.handle(),
       /*xDesc=*/input_nd.handle(),
       /*wDesc=*/filter.handle(), /*convDesc=*/conv.handle(),
-      /*yDesc=*/output_nd.handle(), /*algo=*/ToConvForwardAlgo(algorithm_desc),
+      /*yDesc=*/output_nd.handle(), /*algo=*/ToConvForwardAlgo(*algorithm_desc),
       /*sizeInBytes=*/&size_in_bytes));
+
+  if (TF_PREDICT_FALSE(!algorithm_desc)) {
+    return port::Status(port::error::INVALID_ARGUMENT,
+                        "No AlgorithmDesc provided");
+  }
+  algorithm_desc->set_scratch_size(size_in_bytes);
   int64 size_in_bytes_int64 = size_in_bytes;
 
   if (TF_PREDICT_FALSE(size_in_bytes_int64 < 0)) {
@@ -2028,15 +2034,15 @@ port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace(
 port::StatusOr<DeviceMemory<uint8>>
 AllocateCudnnConvolutionBackwardDataWorkspace(
     Stream* stream, const CudnnHandle& cudnn,
-    const dnn::AlgorithmDesc& algorithm_desc,
     const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter,
     const CudnnConvolutionDescriptor& conv,
     const CudnnTensorDescriptor& output_nd,
+    dnn::AlgorithmDesc* algorithm_desc,
     ScratchAllocator* scratch_allocator) {
   // TODO(csigg): This has side effects on the convolution descriptor. It is
   // functionally correct because the convolution is run with the algorithm of
   // the last call to this function, but should be fixed anyway.
-  conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled());
+  conv.set_use_tensor_op_math(algorithm_desc->tensor_ops_enabled());
 
   // Query the size of the workspace and allocate it.
   size_t size_in_bytes;
@@ -2046,8 +2052,14 @@ AllocateCudnnConvolutionBackwardDataWorkspace(
       /*dyDesc=*/output_nd.handle(),
       /*convDesc=*/conv.handle(),
       /*dxDesc=*/input_nd.handle(),
-      /*algo=*/ToConvBackwardDataAlgo(algorithm_desc),
+      /*algo=*/ToConvBackwardDataAlgo(*algorithm_desc),
       /*sizeInBytes=*/&size_in_bytes));
+
+  if (TF_PREDICT_FALSE(!algorithm_desc)) {
+    return port::Status(port::error::INVALID_ARGUMENT,
+                        "No AlgorithmDesc provided");
+  }
+  algorithm_desc->set_scratch_size(size_in_bytes);
   int64 size_in_bytes_int64 = size_in_bytes;
 
   if (TF_PREDICT_FALSE(size_in_bytes_int64 < 0)) {
@@ -2072,15 +2084,15 @@ AllocateCudnnConvolutionBackwardDataWorkspace(
 port::StatusOr<DeviceMemory<uint8>>
 AllocateCudnnConvolutionBackwardFilterWorkspace(
     Stream* stream, const CudnnHandle& cudnn,
-    const dnn::AlgorithmDesc& algorithm_desc,
     const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter,
     const CudnnConvolutionDescriptor& conv,
     const CudnnTensorDescriptor& output_nd,
+    dnn::AlgorithmDesc* algorithm_desc,
     ScratchAllocator* scratch_allocator) {
   // TODO(csigg): This has side effects on the convolution descriptor. It is
   // functionally correct because the convolution is run with the algorithm of
   // the last call to this function, but should be fixed anyway.
-  conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled());
+  conv.set_use_tensor_op_math(algorithm_desc->tensor_ops_enabled());
 
   // Query the size of the workspace and allocate it.
   size_t size_in_bytes;
@@ -2090,8 +2102,14 @@ AllocateCudnnConvolutionBackwardFilterWorkspace(
       /*dyDesc=*/output_nd.handle(),
       /*convDesc=*/conv.handle(),
       /*gradDesc=*/filter.handle(),
-      /*algo=*/ToConvBackwardFilterAlgo(algorithm_desc),
+      /*algo=*/ToConvBackwardFilterAlgo(*algorithm_desc),
       /*sizeInBytes=*/&size_in_bytes));
+
+  if (TF_PREDICT_FALSE(!algorithm_desc)) {
+    return port::Status(port::error::INVALID_ARGUMENT,
+                        "No AlgorithmDesc provided");
+  }
+  algorithm_desc->set_scratch_size(size_in_bytes);
   int64 size_in_bytes_int64 = size_in_bytes;
 
   if (TF_PREDICT_FALSE(size_in_bytes_int64 < 0)) {
@@ -2138,7 +2156,7 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm(
   }
 
   auto scratch_or = AllocateCudnnConvolutionForwardWorkspace(
-      stream, cudnn, algo_desc, input_nd, filter, conv, output_nd,
+      stream, cudnn, input_nd, filter, conv, output_nd, &algo_desc,
       scratch_allocator);
 
   if (scratch_or.ok()) {
@@ -2155,11 +2173,12 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm(
         "while a secondary algorithm is not provided.");
   }
 
+  algo_desc = algorithm_config.algorithm_no_scratch();
   SE_ASSIGN_OR_RETURN(
       *scratch, AllocateCudnnConvolutionForwardWorkspace(
-                    stream, cudnn, algorithm_config.algorithm_no_scratch(),
-                    input_nd, filter, conv, output_nd, scratch_allocator));
-  return algorithm_config.algorithm_no_scratch();
+                    stream, cudnn,
+                    input_nd, filter, conv, output_nd, &algo_desc, scratch_allocator));
+  return algo_desc;
 }
 
 port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm(
@@ -2187,7 +2206,7 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm(
   }
 
   auto scratch_or = AllocateCudnnConvolutionBackwardDataWorkspace(
-      stream, cudnn, algo_desc, input_nd, filter, conv, output_nd,
+      stream, cudnn, input_nd, filter, conv, output_nd, &algo_desc,
       scratch_allocator);
 
   if (scratch_or.ok()) {
@@ -2204,11 +2223,12 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm(
         "while a secondary algorithm is not provided.");
   }
 
+  algo_desc = algorithm_config.algorithm_no_scratch();
   SE_ASSIGN_OR_RETURN(
       *scratch, AllocateCudnnConvolutionBackwardDataWorkspace(
-                    stream, cudnn, algorithm_config.algorithm_no_scratch(),
-                    input_nd, filter, conv, output_nd, scratch_allocator));
-  return algorithm_config.algorithm_no_scratch();
+                    stream, cudnn,
+                    input_nd, filter, conv, output_nd, &algo_desc, scratch_allocator));
+  return algo_desc;
 }
 
 port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardFilterAlgorithm(
@@ -2236,7 +2256,7 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardFilterAlgorithm(
   }
 
   auto scratch_or = AllocateCudnnConvolutionBackwardFilterWorkspace(
-      stream, cudnn, algo_desc, input_nd, filter, conv, output_nd,
+      stream, cudnn, input_nd, filter, conv, output_nd, &algo_desc,
       scratch_allocator);
 
   if (scratch_or.ok()) {
@@ -2253,11 +2273,12 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardFilterAlgorithm(
         "while a secondary algorithm is not provided.");
   }
 
+  algo_desc = algorithm_config.algorithm_no_scratch();
   SE_ASSIGN_OR_RETURN(*scratch,
                       AllocateCudnnConvolutionBackwardFilterWorkspace(
-                          stream, cudnn, algorithm_config.algorithm(), input_nd,
-                          filter, conv, output_nd, scratch_allocator));
-  return algorithm_config.algorithm_no_scratch();
+                          stream, cudnn, input_nd,
+                          filter, conv, output_nd, &algo_desc, scratch_allocator));
+  return algo_desc;
 }
 
 // A helper class to set env-vars and choose options for cudnn-related