aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-06-12 05:15:55 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-12 05:18:34 -0700
commit7076ae10ed39d7e1870595347e11f3a99b9410d0 (patch)
treef1c7cbcb194156c3d1534a8961651367ff64992b /tensorflow/stream_executor/cuda
parent1f1e88a681d5d6dea966033acf9b7e235913a35f (diff)
Unify cuDNN descriptor wrapper names.
No functional changes. PiperOrigin-RevId: 200199956
Diffstat (limited to 'tensorflow/stream_executor/cuda')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.cc255
1 files changed, 124 insertions, 131 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 48afc06e32..d4f2fd2625 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -495,10 +495,10 @@ PersistentRnnPlan CreatePersistentRnnPlan(cudnnRNNDescriptor_t rnn_desc,
// Turns a BatchDescriptor structure into a cudnn tensor handle within a
// scope.
-class ScopedTensorDescriptor {
+class CudnnTensorDescriptor {
public:
- ScopedTensorDescriptor(const dnn::BatchDescriptor& batch_descriptor,
- cudnnDataType_t elem_type)
+ CudnnTensorDescriptor(const dnn::BatchDescriptor& batch_descriptor,
+ cudnnDataType_t elem_type)
: handle_(CreateTensorDescriptor()) {
switch (batch_descriptor.layout()) {
case dnn::DataLayout::kBatchYXDepth:
@@ -540,15 +540,15 @@ class ScopedTensorDescriptor {
private:
TensorDescriptor handle_;
- SE_DISALLOW_COPY_AND_ASSIGN(ScopedTensorDescriptor);
+ SE_DISALLOW_COPY_AND_ASSIGN(CudnnTensorDescriptor);
};
// Turns a FilterDescriptor structure into a cudnn filter handle within a
// scope.
-class ScopedFilterDescriptor {
+class CudnnFilterDescriptor {
public:
- ScopedFilterDescriptor(const dnn::FilterDescriptor& filter_descriptor,
- cudnnDataType_t elem_type)
+ CudnnFilterDescriptor(const dnn::FilterDescriptor& filter_descriptor,
+ cudnnDataType_t elem_type)
: handle_(CreateFilterDescriptor()) {
// TODO(b/23032134): Even if the filter layout is not supported,
// cudnnSetFilter4DDescriptor_v4 will return CUDNN_STATUS_SUCCESS because
@@ -586,7 +586,7 @@ class ScopedFilterDescriptor {
private:
FilterDescriptor handle_; // Owned.
- SE_DISALLOW_COPY_AND_ASSIGN(ScopedFilterDescriptor);
+ SE_DISALLOW_COPY_AND_ASSIGN(CudnnFilterDescriptor);
};
// A helper function to decide whether to enable the TENSOR_OP_MATH math type
@@ -636,9 +636,9 @@ bool BatchnormSpatialPersistentEnabled() {
// Turns a ConvolutionDescriptor structure into a cudnn convolution handle
// within a scope.
-class ScopedConvolutionDescriptor {
+class CudnnConvolutionDescriptor {
public:
- ScopedConvolutionDescriptor(
+ CudnnConvolutionDescriptor(
const dnn::ConvolutionDescriptor& convolution_descriptor,
cudnnDataType_t data_type)
: handle_(CreateConvolutionDescriptor()) {
@@ -700,14 +700,14 @@ class ScopedConvolutionDescriptor {
private:
ConvolutionDescriptor handle_; // Owned.
- SE_DISALLOW_COPY_AND_ASSIGN(ScopedConvolutionDescriptor);
+ SE_DISALLOW_COPY_AND_ASSIGN(CudnnConvolutionDescriptor);
};
// Turns a PoolingDescriptor structure into a cudnn pooling descriptor handle
// within a scope.
-class ScopedPoolingDescriptor {
+class CudnnPoolingDescriptor {
public:
- explicit ScopedPoolingDescriptor(
+ explicit CudnnPoolingDescriptor(
const dnn::PoolingDescriptor& pooling_descriptor)
: handle_(CreatePoolingDescriptor()) {
const std::vector<int64> strides64 = pooling_descriptor.strides();
@@ -739,13 +739,13 @@ class ScopedPoolingDescriptor {
private:
PoolingDescriptor handle_; // Owned.
- SE_DISALLOW_COPY_AND_ASSIGN(ScopedPoolingDescriptor);
+ SE_DISALLOW_COPY_AND_ASSIGN(CudnnPoolingDescriptor);
};
// Turns a NormalizeDescriptor structure into a cudnn LRN descriptor handle.
-class ScopedNormalizeDescriptor {
+class CudnnNormalizeDescriptor {
public:
- explicit ScopedNormalizeDescriptor(
+ explicit CudnnNormalizeDescriptor(
const dnn::NormalizeDescriptor& normalize_descriptor)
: handle_(CreateLrnDescriptor()) {
// The range specifies that the indices in the closed range
@@ -777,16 +777,16 @@ class ScopedNormalizeDescriptor {
private:
LrnDescriptor handle_; // Owned.
- SE_DISALLOW_COPY_AND_ASSIGN(ScopedNormalizeDescriptor);
+ SE_DISALLOW_COPY_AND_ASSIGN(CudnnNormalizeDescriptor);
};
// Turns a ActivationDescriptor structure into a cudnn activation
// descriptor handle within a scope.
-class ScopedActivationDescriptor {
+class CudnnActivationDescriptor {
public:
- ScopedActivationDescriptor(dnn::ActivationMode activation_mode,
- cudnnNanPropagation_t nan_propagation,
- double value_max)
+ CudnnActivationDescriptor(dnn::ActivationMode activation_mode,
+ cudnnNanPropagation_t nan_propagation,
+ double value_max)
: handle_(CreateActivationDescriptor()) {
double relu_ceiling = 0.0;
cudnnActivationMode_t mode;
@@ -822,7 +822,7 @@ class ScopedActivationDescriptor {
private:
ActivationDescriptor handle_; // Owned.
- SE_DISALLOW_COPY_AND_ASSIGN(ScopedActivationDescriptor);
+ SE_DISALLOW_COPY_AND_ASSIGN(CudnnActivationDescriptor);
};
cudnnDataType_t ToCudnnDataType(
@@ -888,21 +888,21 @@ int CudnnDataTypeToByteSize(cudnnDataType_t data_type) {
}
}
-class ScopedDropoutDescriptor {
- explicit ScopedDropoutDescriptor(DropoutDescriptor handle)
+class CudnnDropoutDescriptor {
+ explicit CudnnDropoutDescriptor(DropoutDescriptor handle)
: handle_(std::move(handle)) {}
public:
- ScopedDropoutDescriptor(ScopedDropoutDescriptor&&) = default;
+ CudnnDropoutDescriptor(CudnnDropoutDescriptor&&) = default;
- static port::StatusOr<ScopedDropoutDescriptor> Create(
+ static port::StatusOr<CudnnDropoutDescriptor> Create(
const CudnnHandle& cudnn, float dropout, uint64 seed,
ScratchAllocator* state_allocator) {
DropoutDescriptor handle = CreateDropoutDescriptor();
if (dropout == 0.0f) {
// Return 'empty' dropout descriptor.
- return ScopedDropoutDescriptor(std::move(handle));
+ return CudnnDropoutDescriptor(std::move(handle));
}
DeviceMemory<uint8> state_memory;
@@ -917,14 +917,14 @@ class ScopedDropoutDescriptor {
handle.get(), cudnn.handle(), dropout, state_memory.opaque(),
state_memory.size(), seed));
- return ScopedDropoutDescriptor(std::move(handle));
+ return CudnnDropoutDescriptor(std::move(handle));
}
cudnnDropoutDescriptor_t handle() const { return handle_.get(); }
private:
DropoutDescriptor handle_; // Owned.
- SE_DISALLOW_COPY_AND_ASSIGN(ScopedDropoutDescriptor);
+ SE_DISALLOW_COPY_AND_ASSIGN(CudnnDropoutDescriptor);
};
class CudnnRnnParamsDescriptor {
@@ -973,7 +973,7 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor {
cudnnRNNMode_t rnn_mode, cudnnDataType_t data_type,
cudnnDataType_t compute_type,
const dnn::AlgorithmConfig& algorithm_config,
- ScopedDropoutDescriptor dropout_desc,
+ CudnnDropoutDescriptor dropout_desc,
CudnnRnnParamsDescriptor params_desc)
: rnn_desc_(std::move(rnn_desc)),
rnn_plan_(std::move(rnn_plan)),
@@ -1002,8 +1002,8 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor {
const dnn::AlgorithmConfig& algorithm_config, float dropout, uint64 seed,
ScratchAllocator* state_allocator) {
SE_ASSIGN_OR_RETURN(
- ScopedDropoutDescriptor dropout_desc,
- ScopedDropoutDescriptor::Create(cudnn, dropout, seed, state_allocator));
+ CudnnDropoutDescriptor dropout_desc,
+ CudnnDropoutDescriptor::Create(cudnn, dropout, seed, state_allocator));
cuda::RnnDescriptor rnn_desc = CreateRnnDescriptor();
cudnnRNNAlgo_t rnn_algo = ToCudnnRNNAlgo(algorithm_config.algorithm());
@@ -1097,7 +1097,7 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor {
cudnnDataType_t data_type_;
cudnnDataType_t compute_type_;
dnn::AlgorithmConfig algorithm_config_;
- ScopedDropoutDescriptor dropout_desc_;
+ CudnnDropoutDescriptor dropout_desc_;
CudnnRnnParamsDescriptor params_desc_;
SE_DISALLOW_COPY_AND_ASSIGN(CudnnRnnDescriptor);
};
@@ -1926,10 +1926,9 @@ namespace {
// and backward filter.
port::StatusOr<cudnnConvolutionFwdAlgo_t> GetCudnnConvolutionForwardAlgo(
- const CudnnHandle& cudnn, const ScopedTensorDescriptor& input_nd,
- const ScopedFilterDescriptor& filter,
- const ScopedConvolutionDescriptor& conv,
- const ScopedTensorDescriptor& output_nd, bool specify_workspace_limit,
+ const CudnnHandle& cudnn, const CudnnTensorDescriptor& input_nd,
+ const CudnnFilterDescriptor& filter, const CudnnConvolutionDescriptor& conv,
+ const CudnnTensorDescriptor& output_nd, bool specify_workspace_limit,
size_t memory_limit_bytes) {
cudnnConvolutionFwdPreference_t preference =
specify_workspace_limit ? CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
@@ -1943,10 +1942,10 @@ port::StatusOr<cudnnConvolutionFwdAlgo_t> GetCudnnConvolutionForwardAlgo(
port::StatusOr<cudnnConvolutionBwdDataAlgo_t>
GetCudnnConvolutionBackwardDataAlgo(const CudnnHandle& cudnn,
- const ScopedTensorDescriptor& input_nd,
- const ScopedFilterDescriptor& filter,
- const ScopedConvolutionDescriptor& conv,
- const ScopedTensorDescriptor& output_nd,
+ const CudnnTensorDescriptor& input_nd,
+ const CudnnFilterDescriptor& filter,
+ const CudnnConvolutionDescriptor& conv,
+ const CudnnTensorDescriptor& output_nd,
bool specify_workspace_limit,
size_t memory_limit_bytes) {
cudnnConvolutionBwdDataPreference_t preference =
@@ -1962,10 +1961,10 @@ GetCudnnConvolutionBackwardDataAlgo(const CudnnHandle& cudnn,
port::StatusOr<cudnnConvolutionBwdFilterAlgo_t>
GetCudnnConvolutionBackwardFilterAlgo(const CudnnHandle& cudnn,
- const ScopedTensorDescriptor& input_nd,
- const ScopedFilterDescriptor& filter,
- const ScopedConvolutionDescriptor& conv,
- const ScopedTensorDescriptor& output_nd,
+ const CudnnTensorDescriptor& input_nd,
+ const CudnnFilterDescriptor& filter,
+ const CudnnConvolutionDescriptor& conv,
+ const CudnnTensorDescriptor& output_nd,
bool specify_workspace_limit,
size_t memory_limit_bytes) {
cudnnConvolutionBwdFilterPreference_t preference =
@@ -1982,10 +1981,9 @@ GetCudnnConvolutionBackwardFilterAlgo(const CudnnHandle& cudnn,
port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace(
Stream* stream, const CudnnHandle& cudnn,
const dnn::AlgorithmDesc& algorithm_desc,
- const ScopedTensorDescriptor& input_nd,
- const ScopedFilterDescriptor& filter,
- const ScopedConvolutionDescriptor& conv,
- const ScopedTensorDescriptor& output_nd,
+ const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter,
+ const CudnnConvolutionDescriptor& conv,
+ const CudnnTensorDescriptor& output_nd,
ScratchAllocator* scratch_allocator) {
// TODO(csigg): This has side effects on the convolution descriptor. It is
// functionally correct because the convolution is run with the algorithm of
@@ -2025,10 +2023,9 @@ port::StatusOr<DeviceMemory<uint8>>
AllocateCudnnConvolutionBackwardDataWorkspace(
Stream* stream, const CudnnHandle& cudnn,
const dnn::AlgorithmDesc& algorithm_desc,
- const ScopedTensorDescriptor& input_nd,
- const ScopedFilterDescriptor& filter,
- const ScopedConvolutionDescriptor& conv,
- const ScopedTensorDescriptor& output_nd,
+ const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter,
+ const CudnnConvolutionDescriptor& conv,
+ const CudnnTensorDescriptor& output_nd,
ScratchAllocator* scratch_allocator) {
// TODO(csigg): This has side effects on the convolution descriptor. It is
// functionally correct because the convolution is run with the algorithm of
@@ -2070,10 +2067,9 @@ port::StatusOr<DeviceMemory<uint8>>
AllocateCudnnConvolutionBackwardFilterWorkspace(
Stream* stream, const CudnnHandle& cudnn,
const dnn::AlgorithmDesc& algorithm_desc,
- const ScopedTensorDescriptor& input_nd,
- const ScopedFilterDescriptor& filter,
- const ScopedConvolutionDescriptor& conv,
- const ScopedTensorDescriptor& output_nd,
+ const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter,
+ const CudnnConvolutionDescriptor& conv,
+ const CudnnTensorDescriptor& output_nd,
ScratchAllocator* scratch_allocator) {
// TODO(csigg): This has side effects on the convolution descriptor. It is
// functionally correct because the convolution is run with the algorithm of
@@ -2114,11 +2110,10 @@ AllocateCudnnConvolutionBackwardFilterWorkspace(
port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm(
Stream* stream, const CudnnHandle& cudnn,
const dnn::AlgorithmConfig& algorithm_config,
- const ScopedTensorDescriptor& input_nd,
- const ScopedFilterDescriptor& filter,
- const ScopedConvolutionDescriptor& conv,
- const ScopedTensorDescriptor& output_nd,
- ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch) {
+ const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter,
+ const CudnnConvolutionDescriptor& conv,
+ const CudnnTensorDescriptor& output_nd, ScratchAllocator* scratch_allocator,
+ DeviceMemory<uint8>* scratch) {
dnn::AlgorithmDesc algo_desc = algorithm_config.algorithm();
if (algorithm_config.algorithm().is_default()) {
// Pick fastest algorithm within memory limit according to cuDNN's
@@ -2164,11 +2159,10 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm(
port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm(
Stream* stream, const CudnnHandle& cudnn,
const dnn::AlgorithmConfig& algorithm_config,
- const ScopedTensorDescriptor& input_nd,
- const ScopedFilterDescriptor& filter,
- const ScopedConvolutionDescriptor& conv,
- const ScopedTensorDescriptor& output_nd,
- ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch) {
+ const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter,
+ const CudnnConvolutionDescriptor& conv,
+ const CudnnTensorDescriptor& output_nd, ScratchAllocator* scratch_allocator,
+ DeviceMemory<uint8>* scratch) {
dnn::AlgorithmDesc algo_desc = algorithm_config.algorithm();
if (algorithm_config.algorithm().is_default()) {
// Pick fastest algorithm within memory limit according to cuDNN's
@@ -2214,11 +2208,10 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm(
port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardFilterAlgorithm(
Stream* stream, const CudnnHandle& cudnn,
const dnn::AlgorithmConfig& algorithm_config,
- const ScopedTensorDescriptor& input_nd,
- const ScopedFilterDescriptor& filter,
- const ScopedConvolutionDescriptor& conv,
- const ScopedTensorDescriptor& output_nd,
- ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch) {
+ const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter,
+ const CudnnConvolutionDescriptor& conv,
+ const CudnnTensorDescriptor& output_nd, ScratchAllocator* scratch_allocator,
+ DeviceMemory<uint8>* scratch) {
dnn::AlgorithmDesc algo_desc = algorithm_config.algorithm();
if (algorithm_config.algorithm().is_default()) {
// Pick fastest algorithm within memory limit according to cuDNN's
@@ -2387,11 +2380,11 @@ port::Status CudnnSupport::DoConvolveImpl(
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
cudnnDataType_t cudnn_type = GetCudnnDataType<T>();
- ScopedTensorDescriptor input_nd(input_descriptor, cudnn_type);
- ScopedTensorDescriptor output_nd(output_descriptor, cudnn_type);
- ScopedFilterDescriptor filter(filter_descriptor, cudnn_type);
- ScopedConvolutionDescriptor conv(convolution_descriptor,
- GetConvComputeType<T>());
+ CudnnTensorDescriptor input_nd(input_descriptor, cudnn_type);
+ CudnnTensorDescriptor output_nd(output_descriptor, cudnn_type);
+ CudnnFilterDescriptor filter(filter_descriptor, cudnn_type);
+ CudnnConvolutionDescriptor conv(convolution_descriptor,
+ GetConvComputeType<T>());
auto cudnn = cudnn_->GetHandle(parent_, stream);
// Alpha is the scaling factor for input.
@@ -2493,14 +2486,14 @@ port::Status CudnnSupport::DoFusedConvolveImpl(
"Relu activation.");
}
- ScopedTensorDescriptor conv_input_nd(
+ CudnnTensorDescriptor conv_input_nd(
conv_input_descriptor, static_cast<cudnnDataType_t>(cudnn_data_type));
- ScopedTensorDescriptor output_nd(
+ CudnnTensorDescriptor output_nd(
output_descriptor, static_cast<cudnnDataType_t>(cudnn_data_type));
- ScopedFilterDescriptor filter(filter_descriptor,
- static_cast<cudnnDataType_t>(cudnn_data_type));
- ScopedTensorDescriptor bias_nd(bias_descriptor, CUDNN_DATA_FLOAT);
- ScopedConvolutionDescriptor conv(
+ CudnnFilterDescriptor filter(filter_descriptor,
+ static_cast<cudnnDataType_t>(cudnn_data_type));
+ CudnnTensorDescriptor bias_nd(bias_descriptor, CUDNN_DATA_FLOAT);
+ CudnnConvolutionDescriptor conv(
convolution_descriptor, static_cast<cudnnDataType_t>(cudnn_compute_type));
auto cudnn = cudnn_->GetHandle(parent_, stream);
@@ -2528,7 +2521,7 @@ port::Status CudnnSupport::DoFusedConvolveImpl(
// activation descriptor. Note that this will change the nan propagation
// behavior from separate conv, bias, and relu (which by default is
// CUDNN_PROPAGATE_NAN.
- ScopedActivationDescriptor activation_desc(
+ CudnnActivationDescriptor activation_desc(
activation_mode, CUDNN_NOT_PROPAGATE_NAN, output_descriptor.value_max());
auto side_input_data_ptr = (side_input_scale == 0) ? output_data->opaque()
: side_input_data.opaque();
@@ -2740,8 +2733,8 @@ port::Status CudnnSupport::DoBatchNormalizationForwardImpl(
DeviceMemory<U>* saved_mean, DeviceMemory<U>* saved_inv_var,
bool is_training, std::function<const DeviceMemory<U>&()> var_to_inv_var,
std::function<void()> inv_var_to_var) {
- ScopedTensorDescriptor x_descriptor(x_desc, ToCudnnDataType(input_data_type));
- ScopedTensorDescriptor scale_offset_descriptor(
+ CudnnTensorDescriptor x_descriptor(x_desc, ToCudnnDataType(input_data_type));
+ CudnnTensorDescriptor scale_offset_descriptor(
scale_offset_desc, ToCudnnDataType(scale_data_type));
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
#if CUDNN_VERSION >= 7000
@@ -2825,9 +2818,9 @@ port::Status CudnnSupport::DoBatchNormalizationBackwardImpl(
const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
DeviceMemory<T>* x_backprop, DeviceMemory<U>* scale_backprop,
DeviceMemory<U>* offset_backprop) {
- ScopedTensorDescriptor x_descriptor(
+ CudnnTensorDescriptor x_descriptor(
x_desc, static_cast<cudnnDataType_t>(cudnn_input_type));
- ScopedTensorDescriptor scale_offset_descriptor(
+ CudnnTensorDescriptor scale_offset_descriptor(
scale_offset_desc, static_cast<cudnnDataType_t>(cudnn_scale_type));
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
#if CUDNN_VERSION >= 7000
@@ -3017,9 +3010,9 @@ bool CudnnSupport::DoTransformTensor(Stream* stream,
dnn::DataType output_type, float scale,
DeviceMemoryBase* output_data) {
float beta = 0.0f;
- ScopedTensorDescriptor input_tensor_desc(
+ CudnnTensorDescriptor input_tensor_desc(
input_desc, ToCudnnDataType(input_type, input_desc.layout()));
- ScopedTensorDescriptor output_tensor_desc(
+ CudnnTensorDescriptor output_tensor_desc(
output_desc, ToCudnnDataType(output_type, output_desc.layout()));
auto cudnn = cudnn_->GetHandle(parent_, stream);
auto status = [&] {
@@ -3056,11 +3049,11 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl(
auto cudnn = cudnn_->GetHandle(parent_, stream);
- ScopedTensorDescriptor out_back_nd(output_descriptor, cudnn_type);
- ScopedTensorDescriptor in_back_nd(input_descriptor, cudnn_type);
- ScopedFilterDescriptor filter(filter_descriptor, cudnn_type);
- ScopedConvolutionDescriptor conv(convolution_descriptor,
- GetConvComputeType<T>());
+ CudnnTensorDescriptor out_back_nd(output_descriptor, cudnn_type);
+ CudnnTensorDescriptor in_back_nd(input_descriptor, cudnn_type);
+ CudnnFilterDescriptor filter(filter_descriptor, cudnn_type);
+ CudnnConvolutionDescriptor conv(convolution_descriptor,
+ GetConvComputeType<T>());
const bool is_profiling = output_profile_result != nullptr;
@@ -3192,11 +3185,11 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
auto cudnn = cudnn_->GetHandle(parent_, stream);
- ScopedTensorDescriptor out_back_nd(output_descriptor, cudnn_type);
- ScopedTensorDescriptor input_nd(input_descriptor, cudnn_type);
- ScopedFilterDescriptor filter(filter_descriptor, cudnn_type);
- ScopedConvolutionDescriptor conv(convolution_descriptor,
- GetConvComputeType<T>());
+ CudnnTensorDescriptor out_back_nd(output_descriptor, cudnn_type);
+ CudnnTensorDescriptor input_nd(input_descriptor, cudnn_type);
+ CudnnFilterDescriptor filter(filter_descriptor, cudnn_type);
+ CudnnConvolutionDescriptor conv(convolution_descriptor,
+ GetConvComputeType<T>());
const bool is_profiling = output_profile_result != nullptr;
@@ -3338,8 +3331,8 @@ port::Status CudnnSupport::DoConvolveBackwardBiasImpl(
const dnn::BatchDescriptor& bias_descriptor,
DeviceMemory<T>* backward_bias_data) {
cudnnDataType_t cudnn_type = GetCudnnDataType<T>();
- ScopedTensorDescriptor input_nd(input_descriptor, cudnn_type);
- ScopedTensorDescriptor bias_nd(bias_descriptor, cudnn_type);
+ CudnnTensorDescriptor input_nd(input_descriptor, cudnn_type);
+ CudnnTensorDescriptor bias_nd(bias_descriptor, cudnn_type);
// Alpha is the scaling factor for input.
float alpha = 1.0;
@@ -3526,7 +3519,7 @@ bool CudnnSupport::DoBiasAdd(Stream* stream,
const DeviceMemory<float>& biases,
const dnn::BatchDescriptor& dimensions,
DeviceMemory<float>* output_data) {
- ScopedTensorDescriptor input_descriptor(dimensions, CUDNN_DATA_FLOAT);
+ CudnnTensorDescriptor input_descriptor(dimensions, CUDNN_DATA_FLOAT);
dnn::BatchDescriptor bias_dimensions;
bias_dimensions.set_count(1)
@@ -3534,7 +3527,7 @@ bool CudnnSupport::DoBiasAdd(Stream* stream,
.set_height(1)
.set_width(1)
.set_layout(dnn::DataLayout::kBatchYXDepth);
- ScopedTensorDescriptor bias_descriptor(bias_dimensions, CUDNN_DATA_FLOAT);
+ CudnnTensorDescriptor bias_descriptor(bias_dimensions, CUDNN_DATA_FLOAT);
// cudnnAddTensor after R3 is in-place, so we need to copy input_data to
// output_data before doing the addition, unless the input and
@@ -3570,10 +3563,10 @@ bool CudnnSupport::DoActivate(Stream* stream,
const DeviceMemory<float>& input_data,
DeviceMemory<float>* output_data,
uint64 options) {
- ScopedActivationDescriptor activation_desc(
+ CudnnActivationDescriptor activation_desc(
activation_mode, CUDNN_PROPAGATE_NAN, dimensions.value_max());
- ScopedTensorDescriptor input_nd(dimensions, CUDNN_DATA_FLOAT);
+ CudnnTensorDescriptor input_nd(dimensions, CUDNN_DATA_FLOAT);
// Alpha is the input scaling factor.
float alpha = 1.0;
// Beta is the output scaling factor.
@@ -3600,9 +3593,9 @@ bool CudnnSupport::DoPoolForward(
// Beta is the scaling factor for output.
double beta = 0.0;
- ScopedTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_DOUBLE);
- ScopedTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_DOUBLE);
- ScopedPoolingDescriptor pooling_desc(pooling_dimensions);
+ CudnnTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_DOUBLE);
+ CudnnTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_DOUBLE);
+ CudnnPoolingDescriptor pooling_desc(pooling_dimensions);
auto cudnn = cudnn_->GetHandle(parent_, stream);
auto status = [&] {
@@ -3625,9 +3618,9 @@ bool CudnnSupport::DoPoolForward(
// Beta is the scaling factor for output.
float beta = 0.0;
- ScopedTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_FLOAT);
- ScopedTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_FLOAT);
- ScopedPoolingDescriptor pooling_desc(pooling_dimensions);
+ CudnnTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_FLOAT);
+ CudnnTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_FLOAT);
+ CudnnPoolingDescriptor pooling_desc(pooling_dimensions);
auto cudnn = cudnn_->GetHandle(parent_, stream);
auto status = [&] {
@@ -3650,9 +3643,9 @@ bool CudnnSupport::DoPoolForward(
// Beta is the scaling factor for output.
float beta = 0.0;
- ScopedTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_HALF);
- ScopedTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_HALF);
- ScopedPoolingDescriptor pooling_desc(pooling_dimensions);
+ CudnnTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_HALF);
+ CudnnTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_HALF);
+ CudnnPoolingDescriptor pooling_desc(pooling_dimensions);
auto cudnn = cudnn_->GetHandle(parent_, stream);
auto status = [&] {
RETURN_IF_CUDNN_ERROR(cudnnPoolingForward(
@@ -3676,9 +3669,9 @@ bool CudnnSupport::DoPoolBackward(
// Beta is the scaling factor for output.
double beta = 0.0;
- ScopedTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_DOUBLE);
- ScopedTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_DOUBLE);
- ScopedPoolingDescriptor pooling_desc(pooling_dimensions);
+ CudnnTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_DOUBLE);
+ CudnnTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_DOUBLE);
+ CudnnPoolingDescriptor pooling_desc(pooling_dimensions);
auto cudnn = cudnn_->GetHandle(parent_, stream);
auto status = [&] {
@@ -3705,9 +3698,9 @@ bool CudnnSupport::DoPoolBackward(
// Beta is the scaling factor for output.
float beta = 0.0;
- ScopedTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_FLOAT);
- ScopedTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_FLOAT);
- ScopedPoolingDescriptor pooling_desc(pooling_dimensions);
+ CudnnTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_FLOAT);
+ CudnnTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_FLOAT);
+ CudnnPoolingDescriptor pooling_desc(pooling_dimensions);
auto cudnn = cudnn_->GetHandle(parent_, stream);
auto status = [&] {
@@ -3734,9 +3727,9 @@ bool CudnnSupport::DoPoolBackward(
// Beta is the scaling factor for output.
float beta = 0.0;
- ScopedTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_HALF);
- ScopedTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_HALF);
- ScopedPoolingDescriptor pooling_desc(pooling_dimensions);
+ CudnnTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_HALF);
+ CudnnTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_HALF);
+ CudnnPoolingDescriptor pooling_desc(pooling_dimensions);
auto cudnn = cudnn_->GetHandle(parent_, stream);
auto status = [&] {
@@ -3771,8 +3764,8 @@ bool CudnnSupport::DoNormalizeWithDimensions(
return false;
}
- ScopedTensorDescriptor dims(dimensions, CUDNN_DATA_FLOAT);
- ScopedNormalizeDescriptor normalize(normalize_descriptor);
+ CudnnTensorDescriptor dims(dimensions, CUDNN_DATA_FLOAT);
+ CudnnNormalizeDescriptor normalize(normalize_descriptor);
// Alpha is the scaling factor for input.
float alpha = 1.0f;
@@ -3808,8 +3801,8 @@ bool CudnnSupport::DoNormalizeBackwardWithDimensions(
return false;
}
- ScopedTensorDescriptor dims(dimensions, CUDNN_DATA_FLOAT);
- ScopedNormalizeDescriptor normalize(normalize_descriptor);
+ CudnnTensorDescriptor dims(dimensions, CUDNN_DATA_FLOAT);
+ CudnnNormalizeDescriptor normalize(normalize_descriptor);
float alpha = 1.0f;
float beta = 0.0f;
@@ -3932,9 +3925,9 @@ bool CudnnSupport::DeriveOutputBatchDescriptor(
const dnn::FilterDescriptor& filter_descriptor,
const dnn::ConvolutionDescriptor& convolution_descriptor,
dnn::BatchDescriptor* output_batch_descriptor) {
- ScopedTensorDescriptor input_nd(batch_descriptor, CUDNN_DATA_FLOAT);
- ScopedFilterDescriptor filter(filter_descriptor, CUDNN_DATA_FLOAT);
- ScopedConvolutionDescriptor conv(convolution_descriptor, CUDNN_DATA_FLOAT);
+ CudnnTensorDescriptor input_nd(batch_descriptor, CUDNN_DATA_FLOAT);
+ CudnnFilterDescriptor filter(filter_descriptor, CUDNN_DATA_FLOAT);
+ CudnnConvolutionDescriptor conv(convolution_descriptor, CUDNN_DATA_FLOAT);
int dn = batch_descriptor.ndims() + 2;
std::vector<int> dims(dn); // in BDYX