aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Justin Lebar <jlebar@google.com>2018-04-07 11:42:43 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-04-07 11:45:04 -0700
commit1cd76c209ce6f74298843568a7fc397c2e6f958f (patch)
treec4647ef54eaba837b9a5a1a05b0cf029aaec7b36
parente7ea87f97e03360719d132a71acc1eb2f93c249f (diff)
[XLA:GPU] Eliminate the guard around Winograd non-fused convolutions with cudnn7.
Adds DnnSupport::GetVersion() and uses this to unguard Winograd non-fused convolutions if you're using cudnn7. PiperOrigin-RevId: 192010450
-rw-r--r--tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc30
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.cc7
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.h1
-rw-r--r--tensorflow/stream_executor/dnn.h7
4 files changed, 33 insertions, 12 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
index 1792893ae4..d6b457a91b 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
@@ -94,11 +94,17 @@ se::port::StatusOr<se::DeviceMemory<uint8>> ScratchAllocator::AllocateBytes(
// Determines whether we can safely perform a winograd non-fused convolution for
// the given input and output shapes. This works around b/68264959, an integer
// overflow in cuDNNv5 and cuDNNv6.
-//
-// TODO(jlebar): We shouldn't need this check for cuDNNv7.
-bool ShouldIncludeWinogradNonfusedAlgo(
- const Shape& input_shape, const Shape& output_shape,
- const ConvolutionDimensionNumbers& dnums) {
+bool ShouldIncludeWinogradNonfusedAlgo(const Shape& input_shape,
+ const Shape& output_shape,
+ const ConvolutionDimensionNumbers& dnums,
+ se::StreamExecutor* stream_exec) {
+ // Skip this check for cudnn7 and newer.
+ se::port::StatusOr<std::tuple<int, int, int>> version =
+ stream_exec->AsDnn()->GetVersion();
+ if (version.ok() && std::get<0>(version.ValueOrDie()) >= 7) {
+ return true;
+ }
+
int64 batch = input_shape.dimensions(dnums.input_batch_dimension());
int64 in_depths = input_shape.dimensions(dnums.input_feature_dimension());
int64 in_rows = input_shape.dimensions(dnums.input_spatial_dimensions(0));
@@ -118,20 +124,20 @@ bool ShouldIncludeWinogradNonfusedAlgo(
std::vector<AlgorithmDesc> GetAlgorithms(CudnnConvKind kind,
bool with_winograd_nonfused,
- se::StreamExecutor* stream_exec_) {
+ se::StreamExecutor* stream_exec) {
std::vector<AlgorithmDesc> algorithms;
switch (kind) {
case CudnnConvKind::kBackwardFilter:
- CHECK(stream_exec_->GetConvolveBackwardFilterAlgorithms(
+ CHECK(stream_exec->GetConvolveBackwardFilterAlgorithms(
with_winograd_nonfused, &algorithms));
break;
case CudnnConvKind::kBackwardInput:
- CHECK(stream_exec_->GetConvolveBackwardDataAlgorithms(
+ CHECK(stream_exec->GetConvolveBackwardDataAlgorithms(
with_winograd_nonfused, &algorithms));
break;
case CudnnConvKind::kForward:
- CHECK(stream_exec_->GetConvolveAlgorithms(with_winograd_nonfused,
- &algorithms));
+ CHECK(stream_exec->GetConvolveAlgorithms(with_winograd_nonfused,
+ &algorithms));
break;
}
@@ -209,8 +215,8 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
return nullopt;
}
- const bool use_winograd_nonfused =
- ShouldIncludeWinogradNonfusedAlgo(input_shape, output_shape, dnums);
+ const bool use_winograd_nonfused = ShouldIncludeWinogradNonfusedAlgo(
+ input_shape, output_shape, dnums, stream_exec_);
se::dnn::ProfileResult best_result;
int64 best_result_bytes_used = 0;
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 3fd9275289..fa5b90c945 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -478,6 +478,13 @@ port::Status CudnnSupport::Init() {
ToString(status))};
}
+port::StatusOr<std::tuple<int, int, int>> CudnnSupport::GetVersion() {
+ CudnnVersion version;
+ TF_RETURN_IF_ERROR(GetLoadedCudnnVersion(&version));
+ return std::make_tuple(version.major_version, version.minor_version,
+ version.patch_level);
+}
+
// Turns a BatchDescriptor structure into a cudnn tensor handle within a scope.
class ScopedTensorDescriptor {
public:
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index e40ba9b012..0e5368aca8 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -46,6 +46,7 @@ class CudnnSupport : public dnn::DnnSupport {
~CudnnSupport() override;
port::Status Init() override;
+ port::StatusOr<std::tuple<int, int, int>> GetVersion() override;
port::StatusOr<std::unique_ptr<dnn::RnnDescriptor>> createRnnDescriptor(
int num_layers, int hidden_size, int input_size,
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 43cfd313c1..3c47d2c2e8 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -25,6 +25,7 @@ limitations under the License.
#include <functional>
#include <limits>
#include <memory>
+#include <tuple>
#include "tensorflow/stream_executor/device_memory.h"
#include "tensorflow/stream_executor/lib/array_slice.h"
@@ -885,6 +886,12 @@ class DnnSupport {
virtual port::Status Init() = 0;
+ // Gets the version of the backing library, as a {major, minor, patch} tuple.
+ virtual port::StatusOr<std::tuple<int, int, int>> GetVersion() {
+ return port::UnimplementedError(
+ "DnnSupport::GetVersion not implemented on this platform.");
+ }
+
// Performs a single-precision forward batch normalization operation onto
// the stream.
//