aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc48
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.cc57
2 files changed, 64 insertions, 41 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
index f528e62b17..9eee9ebbd7 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
@@ -76,54 +76,23 @@ StatusOr<se::DeviceMemory<uint8>> ScratchAllocator::AllocateBytes(
return se::DeviceMemory<uint8>(buffer_addr);
}
-// Determines whether we can safely perform a winograd non-fused convolution for
-// the given input and output shapes. This works around b/68264959, an integer
-// overflow in cuDNNv5 and cuDNNv6.
-bool ShouldIncludeWinogradNonfusedAlgo(const Shape& input_shape,
- const Shape& output_shape,
- const ConvolutionDimensionNumbers& dnums,
- se::StreamExecutor* stream_exec) {
- // Skip this check for cudnn7 and newer.
- auto version = stream_exec->AsDnn()->GetVersion();
- if (version.ok() && version.ValueOrDie().major_version() >= 7) {
- return true;
- }
-
- int64 batch = input_shape.dimensions(dnums.input_batch_dimension());
- int64 in_depths = input_shape.dimensions(dnums.input_feature_dimension());
- int64 in_rows = input_shape.dimensions(dnums.input_spatial_dimensions(0));
- int64 in_cols =
- dnums.input_spatial_dimensions_size() == 1
- ? 1
- : input_shape.dimensions(dnums.input_spatial_dimensions(1));
- int64 out_depths = output_shape.dimensions(dnums.output_feature_dimension());
-
- int64 total_size = CeilOfRatio(batch, int64{16}) *
- std::max(in_depths, out_depths) * in_cols * in_rows *
- sizeof(float);
-
- const int64 threshold = 1L << 31;
- return total_size < threshold;
-}
-
std::vector<AlgorithmDesc> GetAlgorithms(CudnnConvKind kind,
- bool with_winograd_nonfused,
se::StreamExecutor* stream_exec) {
std::vector<AlgorithmDesc> algorithms;
+ bool succ = false;
switch (kind) {
case CudnnConvKind::kBackwardFilter:
- CHECK(stream_exec->GetConvolveBackwardFilterAlgorithms(
- with_winograd_nonfused, &algorithms));
+ succ =
+ stream_exec->GetConvolveBackwardFilterAlgorithms(true, &algorithms);
break;
case CudnnConvKind::kBackwardInput:
- CHECK(stream_exec->GetConvolveBackwardDataAlgorithms(
- with_winograd_nonfused, &algorithms));
+ succ = stream_exec->GetConvolveBackwardDataAlgorithms(true, &algorithms);
break;
case CudnnConvKind::kForward:
- CHECK(stream_exec->GetConvolveAlgorithms(with_winograd_nonfused,
- &algorithms));
+ succ = stream_exec->GetConvolveAlgorithms(true, &algorithms);
break;
}
+ DCHECK(succ);
return algorithms;
}
@@ -282,8 +251,6 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
}
}();
- const bool use_winograd_nonfused = ShouldIncludeWinogradNonfusedAlgo(
- input_shape, output_shape, *params.dnums, stream_exec_);
se::dnn::ProfileResult best_result;
int64 best_result_bytes_used = 0;
@@ -292,8 +259,7 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
// particular reason to use it, as any algorithm sufficies. It doesn't make
// this algorithm considered correct, though.
optional<AlgorithmDesc> first_algorithm;
- for (const AlgorithmDesc& alg :
- GetAlgorithms(params.kind, use_winograd_nonfused, stream_exec_)) {
+ for (const AlgorithmDesc& alg : GetAlgorithms(params.kind, stream_exec_)) {
ScratchAllocator scratch_allocator(device_ordinal, allocator);
se::dnn::ProfileResult profile_result;
VLOG(3) << "Trying algorithm " << AlgorithmToString(alg) << " for "
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 3a77ba769c..ca90c383f9 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -35,6 +35,7 @@ limitations under the License.
#include "tensorflow/stream_executor/lib/env.h"
#include "tensorflow/stream_executor/lib/error.h"
#include "tensorflow/stream_executor/lib/initialize.h"
+#include "tensorflow/stream_executor/lib/mathutil.h"
#include "tensorflow/stream_executor/lib/strcat.h"
#include "tensorflow/stream_executor/lib/stringpiece.h"
#include "tensorflow/stream_executor/lib/threadpool.h"
@@ -2406,6 +2407,33 @@ cudnnDataType_t GetRnnComputeType(dnn::DataType data_type) {
}
}
+// Determines whether we can safely perform a winograd non-fused convolution for
+// the given input and output shapes. This works around b/68264959, an integer
+// overflow in cuDNNv5 and cuDNNv6.
+#if CUDNN_VERSION >= 7000
+bool ShouldIncludeWinogradNonfusedAlgo(const dnn::BatchDescriptor&,
+ const dnn::BatchDescriptor&) {
+ return true;
+}
+#else
+bool ShouldIncludeWinogradNonfusedAlgo(
+ const dnn::BatchDescriptor& input_desc,
+ const dnn::BatchDescriptor& output_desc) {
+ int64 batch = input_desc.count();
+ int64 in_depths = input_desc.feature_map_count();
+ int64 in_rows = input_desc.height();
+ int64 in_cols = input_desc.ndims() == 1 ? 1 : input_desc.width();
+ int64 out_depths = output_desc.feature_map_count();
+
+ int64 total_size = port::MathUtil::CeilOfRatio(batch, int64{16}) *
+ std::max(in_depths, out_depths) * in_cols * in_rows *
+ sizeof(float);
+
+ const int64 threshold = 1L << 31;
+ return total_size < threshold;
+}
+#endif
+
} // namespace
template <class T>
@@ -2484,6 +2512,13 @@ port::Status CudnnSupport::DoConvolveImpl(
return port::Status::OK();
}());
+ if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
+ !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
+ return port::Status(port::error::FAILED_PRECONDITION,
+ "This configuration has potential integer overflow in "
+ "cuDNNv5 and cuDNNv6. See b/68264959.");
+ }
+
RETURN_IF_CUDNN_ERROR(cudnnConvolutionForward(
cudnn.handle(),
/*alpha=*/alpha, /*srcDesc=*/input_nd.handle(),
@@ -2588,6 +2623,14 @@ port::Status CudnnSupport::DoFusedConvolveImpl(
<< "\noutput_nd.handle() = " << output_nd.handle()
<< "\noutput_data->opaque() = " << output_data->opaque();
+ if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
+ !ShouldIncludeWinogradNonfusedAlgo(conv_input_descriptor,
+ output_descriptor)) {
+ return port::Status(port::error::FAILED_PRECONDITION,
+ "This configuration has potential integer overflow in "
+ "cuDNNv5 and cuDNNv6. See around b/68264959.");
+ }
+
RETURN_IF_CUDNN_ERROR(cudnnConvolutionBiasActivationForward(
cudnn.handle(),
/*alpha1=*/&conv_input_scale,
@@ -3114,6 +3157,13 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl(
}
}
+ if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
+ !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
+ return port::Status(port::error::FAILED_PRECONDITION,
+ "This configuration has potential integer overflow in "
+ "cuDNNv5 and cuDNNv6. See b/68264959.");
+ }
+
// Cudnn 7.1.4 has a bug if the workspace of the following convolution is not
// zero-initialized, nvbugs/2254619.
if (CUDNN_VERSION >= 7000 &&
@@ -3293,6 +3343,13 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
"This configuration potentially produces incorrect results.");
}());
+ if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
+ !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
+ return port::Status(port::error::FAILED_PRECONDITION,
+ "This configuration has potential integer overflow in "
+ "cuDNNv5 and cuDNNv6. See b/68264959.");
+ }
+
// Zero out the result buffer for strided conv backward filter for NHWC
// layouts. cuDNN 7.1.4 and 7.2 has non-determinisic bug if the buffer is not
// zeroed.