diff options
author | Tim Shen <timshen@google.com> | 2018-09-21 18:40:52 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-09-21 18:44:52 -0700 |
commit | 1cb8940078f6be9313899734e1307a69fffc4b6f (patch) | |
tree | 7b5711f94b09d6a0b5ddcbaa836bb68b13196c9a /tensorflow | |
parent | 086183579a59e07fc9b1ebbfa6516258da0a215b (diff) |
Move winograd algorithm workaround to stream executor.
PiperOrigin-RevId: 214075796
Diffstat (limited to 'tensorflow')
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc | 48 | ||||
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.cc | 57 |
2 files changed, 64 insertions, 41 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc index f528e62b17..9eee9ebbd7 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc @@ -76,54 +76,23 @@ StatusOr<se::DeviceMemory<uint8>> ScratchAllocator::AllocateBytes( return se::DeviceMemory<uint8>(buffer_addr); } -// Determines whether we can safely perform a winograd non-fused convolution for -// the given input and output shapes. This works around b/68264959, an integer -// overflow in cuDNNv5 and cuDNNv6. -bool ShouldIncludeWinogradNonfusedAlgo(const Shape& input_shape, - const Shape& output_shape, - const ConvolutionDimensionNumbers& dnums, - se::StreamExecutor* stream_exec) { - // Skip this check for cudnn7 and newer. - auto version = stream_exec->AsDnn()->GetVersion(); - if (version.ok() && version.ValueOrDie().major_version() >= 7) { - return true; - } - - int64 batch = input_shape.dimensions(dnums.input_batch_dimension()); - int64 in_depths = input_shape.dimensions(dnums.input_feature_dimension()); - int64 in_rows = input_shape.dimensions(dnums.input_spatial_dimensions(0)); - int64 in_cols = - dnums.input_spatial_dimensions_size() == 1 - ? 1 - : input_shape.dimensions(dnums.input_spatial_dimensions(1)); - int64 out_depths = output_shape.dimensions(dnums.output_feature_dimension()); - - int64 total_size = CeilOfRatio(batch, int64{16}) * - std::max(in_depths, out_depths) * in_cols * in_rows * - sizeof(float); - - const int64 threshold = 1L << 31; - return total_size < threshold; -} - std::vector<AlgorithmDesc> GetAlgorithms(CudnnConvKind kind, - bool with_winograd_nonfused, se::StreamExecutor* stream_exec) { std::vector<AlgorithmDesc> algorithms; + bool succ = false; switch (kind) { case CudnnConvKind::kBackwardFilter: - CHECK(stream_exec->GetConvolveBackwardFilterAlgorithms( - with_winograd_nonfused, &algorithms)); + succ = + stream_exec->GetConvolveBackwardFilterAlgorithms(true, &algorithms); break; case CudnnConvKind::kBackwardInput: - CHECK(stream_exec->GetConvolveBackwardDataAlgorithms( - with_winograd_nonfused, &algorithms)); + succ = stream_exec->GetConvolveBackwardDataAlgorithms(true, &algorithms); break; case CudnnConvKind::kForward: - CHECK(stream_exec->GetConvolveAlgorithms(with_winograd_nonfused, - &algorithms)); + succ = stream_exec->GetConvolveAlgorithms(true, &algorithms); break; } + DCHECK(succ); return algorithms; } @@ -282,8 +251,6 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( } }(); - const bool use_winograd_nonfused = ShouldIncludeWinogradNonfusedAlgo( - input_shape, output_shape, *params.dnums, stream_exec_); se::dnn::ProfileResult best_result; int64 best_result_bytes_used = 0; @@ -292,8 +259,7 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( // particular reason to use it, as any algorithm sufficies. It doesn't make // this algorithm considered correct, though. optional<AlgorithmDesc> first_algorithm; - for (const AlgorithmDesc& alg : - GetAlgorithms(params.kind, use_winograd_nonfused, stream_exec_)) { + for (const AlgorithmDesc& alg : GetAlgorithms(params.kind, stream_exec_)) { ScratchAllocator scratch_allocator(device_ordinal, allocator); se::dnn::ProfileResult profile_result; VLOG(3) << "Trying algorithm " << AlgorithmToString(alg) << " for " diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 3a77ba769c..ca90c383f9 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -35,6 +35,7 @@ limitations under the License. #include "tensorflow/stream_executor/lib/env.h" #include "tensorflow/stream_executor/lib/error.h" #include "tensorflow/stream_executor/lib/initialize.h" +#include "tensorflow/stream_executor/lib/mathutil.h" #include "tensorflow/stream_executor/lib/strcat.h" #include "tensorflow/stream_executor/lib/stringpiece.h" #include "tensorflow/stream_executor/lib/threadpool.h" @@ -2406,6 +2407,33 @@ cudnnDataType_t GetRnnComputeType(dnn::DataType data_type) { } } +// Determines whether we can safely perform a winograd non-fused convolution for +// the given input and output shapes. This works around b/68264959, an integer +// overflow in cuDNNv5 and cuDNNv6. +#if CUDNN_VERSION >= 7000 +bool ShouldIncludeWinogradNonfusedAlgo(const dnn::BatchDescriptor&, + const dnn::BatchDescriptor&) { + return true; +} +#else +bool ShouldIncludeWinogradNonfusedAlgo( + const dnn::BatchDescriptor& input_desc, + const dnn::BatchDescriptor& output_desc) { + int64 batch = input_desc.count(); + int64 in_depths = input_desc.feature_map_count(); + int64 in_rows = input_desc.height(); + int64 in_cols = input_desc.ndims() == 1 ? 1 : input_desc.width(); + int64 out_depths = output_desc.feature_map_count(); + + int64 total_size = port::MathUtil::CeilOfRatio(batch, int64{16}) * + std::max(in_depths, out_depths) * in_cols * in_rows * + sizeof(float); + + const int64 threshold = 1L << 31; + return total_size < threshold; +} +#endif + } // namespace template <class T> @@ -2484,6 +2512,13 @@ port::Status CudnnSupport::DoConvolveImpl( return port::Status::OK(); }()); + if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED && + !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) { + return port::Status(port::error::FAILED_PRECONDITION, + "This configuration has potential integer overflow in " + "cuDNNv5 and cuDNNv6. See b/68264959."); + } + RETURN_IF_CUDNN_ERROR(cudnnConvolutionForward( cudnn.handle(), /*alpha=*/alpha, /*srcDesc=*/input_nd.handle(), @@ -2588,6 +2623,14 @@ port::Status CudnnSupport::DoFusedConvolveImpl( << "\noutput_nd.handle() = " << output_nd.handle() << "\noutput_data->opaque() = " << output_data->opaque(); + if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED && + !ShouldIncludeWinogradNonfusedAlgo(conv_input_descriptor, + output_descriptor)) { + return port::Status(port::error::FAILED_PRECONDITION, + "This configuration has potential integer overflow in " + "cuDNNv5 and cuDNNv6. See around b/68264959."); + } + RETURN_IF_CUDNN_ERROR(cudnnConvolutionBiasActivationForward( cudnn.handle(), /*alpha1=*/&conv_input_scale, @@ -3114,6 +3157,13 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl( } } + if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED && + !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) { + return port::Status(port::error::FAILED_PRECONDITION, + "This configuration has potential integer overflow in " + "cuDNNv5 and cuDNNv6. See b/68264959."); + } + // Cudnn 7.1.4 has a bug if the workspace of the following convolution is not // zero-initialized, nvbugs/2254619. if (CUDNN_VERSION >= 7000 && @@ -3293,6 +3343,13 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl( "This configuration potentially produces incorrect results."); }()); + if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED && + !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) { + return port::Status(port::error::FAILED_PRECONDITION, + "This configuration has potential integer overflow in " + "cuDNNv5 and cuDNNv6. See b/68264959."); + } + // Zero out the result buffer for strided conv backward filter for NHWC // layouts. cuDNN 7.1.4 and 7.2 has non-determinisic bug if the buffer is not // zeroed. |