diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-06-12 05:15:55 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-06-12 05:18:34 -0700 |
commit | 7076ae10ed39d7e1870595347e11f3a99b9410d0 (patch) | |
tree | f1c7cbcb194156c3d1534a8961651367ff64992b /tensorflow/stream_executor/cuda | |
parent | 1f1e88a681d5d6dea966033acf9b7e235913a35f (diff) |
Unify cuDNN descriptor wrapper names.
No functional changes.
PiperOrigin-RevId: 200199956
Diffstat (limited to 'tensorflow/stream_executor/cuda')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.cc | 255 |
1 files changed, 124 insertions, 131 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 48afc06e32..d4f2fd2625 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -495,10 +495,10 @@ PersistentRnnPlan CreatePersistentRnnPlan(cudnnRNNDescriptor_t rnn_desc, // Turns a BatchDescriptor structure into a cudnn tensor handle within a // scope. -class ScopedTensorDescriptor { +class CudnnTensorDescriptor { public: - ScopedTensorDescriptor(const dnn::BatchDescriptor& batch_descriptor, - cudnnDataType_t elem_type) + CudnnTensorDescriptor(const dnn::BatchDescriptor& batch_descriptor, + cudnnDataType_t elem_type) : handle_(CreateTensorDescriptor()) { switch (batch_descriptor.layout()) { case dnn::DataLayout::kBatchYXDepth: @@ -540,15 +540,15 @@ class ScopedTensorDescriptor { private: TensorDescriptor handle_; - SE_DISALLOW_COPY_AND_ASSIGN(ScopedTensorDescriptor); + SE_DISALLOW_COPY_AND_ASSIGN(CudnnTensorDescriptor); }; // Turns a FilterDescriptor structure into a cudnn filter handle within a // scope. -class ScopedFilterDescriptor { +class CudnnFilterDescriptor { public: - ScopedFilterDescriptor(const dnn::FilterDescriptor& filter_descriptor, - cudnnDataType_t elem_type) + CudnnFilterDescriptor(const dnn::FilterDescriptor& filter_descriptor, + cudnnDataType_t elem_type) : handle_(CreateFilterDescriptor()) { // TODO(b/23032134): Even if the filter layout is not supported, // cudnnSetFilter4DDescriptor_v4 will return CUDNN_STATUS_SUCCESS because @@ -586,7 +586,7 @@ class ScopedFilterDescriptor { private: FilterDescriptor handle_; // Owned. - SE_DISALLOW_COPY_AND_ASSIGN(ScopedFilterDescriptor); + SE_DISALLOW_COPY_AND_ASSIGN(CudnnFilterDescriptor); }; // A helper function to decide whether to enable the TENSOR_OP_MATH math type @@ -636,9 +636,9 @@ bool BatchnormSpatialPersistentEnabled() { // Turns a ConvolutionDescriptor structure into a cudnn convolution handle // within a scope. -class ScopedConvolutionDescriptor { +class CudnnConvolutionDescriptor { public: - ScopedConvolutionDescriptor( + CudnnConvolutionDescriptor( const dnn::ConvolutionDescriptor& convolution_descriptor, cudnnDataType_t data_type) : handle_(CreateConvolutionDescriptor()) { @@ -700,14 +700,14 @@ class ScopedConvolutionDescriptor { private: ConvolutionDescriptor handle_; // Owned. - SE_DISALLOW_COPY_AND_ASSIGN(ScopedConvolutionDescriptor); + SE_DISALLOW_COPY_AND_ASSIGN(CudnnConvolutionDescriptor); }; // Turns a PoolingDescriptor structure into a cudnn pooling descriptor handle // within a scope. -class ScopedPoolingDescriptor { +class CudnnPoolingDescriptor { public: - explicit ScopedPoolingDescriptor( + explicit CudnnPoolingDescriptor( const dnn::PoolingDescriptor& pooling_descriptor) : handle_(CreatePoolingDescriptor()) { const std::vector<int64> strides64 = pooling_descriptor.strides(); @@ -739,13 +739,13 @@ class ScopedPoolingDescriptor { private: PoolingDescriptor handle_; // Owned. - SE_DISALLOW_COPY_AND_ASSIGN(ScopedPoolingDescriptor); + SE_DISALLOW_COPY_AND_ASSIGN(CudnnPoolingDescriptor); }; // Turns a NormalizeDescriptor structure into a cudnn LRN descriptor handle. -class ScopedNormalizeDescriptor { +class CudnnNormalizeDescriptor { public: - explicit ScopedNormalizeDescriptor( + explicit CudnnNormalizeDescriptor( const dnn::NormalizeDescriptor& normalize_descriptor) : handle_(CreateLrnDescriptor()) { // The range specifies that the indices in the closed range @@ -777,16 +777,16 @@ class ScopedNormalizeDescriptor { private: LrnDescriptor handle_; // Owned. - SE_DISALLOW_COPY_AND_ASSIGN(ScopedNormalizeDescriptor); + SE_DISALLOW_COPY_AND_ASSIGN(CudnnNormalizeDescriptor); }; // Turns a ActivationDescriptor structure into a cudnn activation // descriptor handle within a scope. -class ScopedActivationDescriptor { +class CudnnActivationDescriptor { public: - ScopedActivationDescriptor(dnn::ActivationMode activation_mode, - cudnnNanPropagation_t nan_propagation, - double value_max) + CudnnActivationDescriptor(dnn::ActivationMode activation_mode, + cudnnNanPropagation_t nan_propagation, + double value_max) : handle_(CreateActivationDescriptor()) { double relu_ceiling = 0.0; cudnnActivationMode_t mode; @@ -822,7 +822,7 @@ class ScopedActivationDescriptor { private: ActivationDescriptor handle_; // Owned. - SE_DISALLOW_COPY_AND_ASSIGN(ScopedActivationDescriptor); + SE_DISALLOW_COPY_AND_ASSIGN(CudnnActivationDescriptor); }; cudnnDataType_t ToCudnnDataType( @@ -888,21 +888,21 @@ int CudnnDataTypeToByteSize(cudnnDataType_t data_type) { } } -class ScopedDropoutDescriptor { - explicit ScopedDropoutDescriptor(DropoutDescriptor handle) +class CudnnDropoutDescriptor { + explicit CudnnDropoutDescriptor(DropoutDescriptor handle) : handle_(std::move(handle)) {} public: - ScopedDropoutDescriptor(ScopedDropoutDescriptor&&) = default; + CudnnDropoutDescriptor(CudnnDropoutDescriptor&&) = default; - static port::StatusOr<ScopedDropoutDescriptor> Create( + static port::StatusOr<CudnnDropoutDescriptor> Create( const CudnnHandle& cudnn, float dropout, uint64 seed, ScratchAllocator* state_allocator) { DropoutDescriptor handle = CreateDropoutDescriptor(); if (dropout == 0.0f) { // Return 'empty' dropout descriptor. - return ScopedDropoutDescriptor(std::move(handle)); + return CudnnDropoutDescriptor(std::move(handle)); } DeviceMemory<uint8> state_memory; @@ -917,14 +917,14 @@ class ScopedDropoutDescriptor { handle.get(), cudnn.handle(), dropout, state_memory.opaque(), state_memory.size(), seed)); - return ScopedDropoutDescriptor(std::move(handle)); + return CudnnDropoutDescriptor(std::move(handle)); } cudnnDropoutDescriptor_t handle() const { return handle_.get(); } private: DropoutDescriptor handle_; // Owned. - SE_DISALLOW_COPY_AND_ASSIGN(ScopedDropoutDescriptor); + SE_DISALLOW_COPY_AND_ASSIGN(CudnnDropoutDescriptor); }; class CudnnRnnParamsDescriptor { @@ -973,7 +973,7 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor { cudnnRNNMode_t rnn_mode, cudnnDataType_t data_type, cudnnDataType_t compute_type, const dnn::AlgorithmConfig& algorithm_config, - ScopedDropoutDescriptor dropout_desc, + CudnnDropoutDescriptor dropout_desc, CudnnRnnParamsDescriptor params_desc) : rnn_desc_(std::move(rnn_desc)), rnn_plan_(std::move(rnn_plan)), @@ -1002,8 +1002,8 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor { const dnn::AlgorithmConfig& algorithm_config, float dropout, uint64 seed, ScratchAllocator* state_allocator) { SE_ASSIGN_OR_RETURN( - ScopedDropoutDescriptor dropout_desc, - ScopedDropoutDescriptor::Create(cudnn, dropout, seed, state_allocator)); + CudnnDropoutDescriptor dropout_desc, + CudnnDropoutDescriptor::Create(cudnn, dropout, seed, state_allocator)); cuda::RnnDescriptor rnn_desc = CreateRnnDescriptor(); cudnnRNNAlgo_t rnn_algo = ToCudnnRNNAlgo(algorithm_config.algorithm()); @@ -1097,7 +1097,7 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor { cudnnDataType_t data_type_; cudnnDataType_t compute_type_; dnn::AlgorithmConfig algorithm_config_; - ScopedDropoutDescriptor dropout_desc_; + CudnnDropoutDescriptor dropout_desc_; CudnnRnnParamsDescriptor params_desc_; SE_DISALLOW_COPY_AND_ASSIGN(CudnnRnnDescriptor); }; @@ -1926,10 +1926,9 @@ namespace { // and backward filter. port::StatusOr<cudnnConvolutionFwdAlgo_t> GetCudnnConvolutionForwardAlgo( - const CudnnHandle& cudnn, const ScopedTensorDescriptor& input_nd, - const ScopedFilterDescriptor& filter, - const ScopedConvolutionDescriptor& conv, - const ScopedTensorDescriptor& output_nd, bool specify_workspace_limit, + const CudnnHandle& cudnn, const CudnnTensorDescriptor& input_nd, + const CudnnFilterDescriptor& filter, const CudnnConvolutionDescriptor& conv, + const CudnnTensorDescriptor& output_nd, bool specify_workspace_limit, size_t memory_limit_bytes) { cudnnConvolutionFwdPreference_t preference = specify_workspace_limit ? CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT @@ -1943,10 +1942,10 @@ port::StatusOr<cudnnConvolutionFwdAlgo_t> GetCudnnConvolutionForwardAlgo( port::StatusOr<cudnnConvolutionBwdDataAlgo_t> GetCudnnConvolutionBackwardDataAlgo(const CudnnHandle& cudnn, - const ScopedTensorDescriptor& input_nd, - const ScopedFilterDescriptor& filter, - const ScopedConvolutionDescriptor& conv, - const ScopedTensorDescriptor& output_nd, + const CudnnTensorDescriptor& input_nd, + const CudnnFilterDescriptor& filter, + const CudnnConvolutionDescriptor& conv, + const CudnnTensorDescriptor& output_nd, bool specify_workspace_limit, size_t memory_limit_bytes) { cudnnConvolutionBwdDataPreference_t preference = @@ -1962,10 +1961,10 @@ GetCudnnConvolutionBackwardDataAlgo(const CudnnHandle& cudnn, port::StatusOr<cudnnConvolutionBwdFilterAlgo_t> GetCudnnConvolutionBackwardFilterAlgo(const CudnnHandle& cudnn, - const ScopedTensorDescriptor& input_nd, - const ScopedFilterDescriptor& filter, - const ScopedConvolutionDescriptor& conv, - const ScopedTensorDescriptor& output_nd, + const CudnnTensorDescriptor& input_nd, + const CudnnFilterDescriptor& filter, + const CudnnConvolutionDescriptor& conv, + const CudnnTensorDescriptor& output_nd, bool specify_workspace_limit, size_t memory_limit_bytes) { cudnnConvolutionBwdFilterPreference_t preference = @@ -1982,10 +1981,9 @@ GetCudnnConvolutionBackwardFilterAlgo(const CudnnHandle& cudnn, port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace( Stream* stream, const CudnnHandle& cudnn, const dnn::AlgorithmDesc& algorithm_desc, - const ScopedTensorDescriptor& input_nd, - const ScopedFilterDescriptor& filter, - const ScopedConvolutionDescriptor& conv, - const ScopedTensorDescriptor& output_nd, + const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter, + const CudnnConvolutionDescriptor& conv, + const CudnnTensorDescriptor& output_nd, ScratchAllocator* scratch_allocator) { // TODO(csigg): This has side effects on the convolution descriptor. It is // functionally correct because the convolution is run with the algorithm of @@ -2025,10 +2023,9 @@ port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionBackwardDataWorkspace( Stream* stream, const CudnnHandle& cudnn, const dnn::AlgorithmDesc& algorithm_desc, - const ScopedTensorDescriptor& input_nd, - const ScopedFilterDescriptor& filter, - const ScopedConvolutionDescriptor& conv, - const ScopedTensorDescriptor& output_nd, + const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter, + const CudnnConvolutionDescriptor& conv, + const CudnnTensorDescriptor& output_nd, ScratchAllocator* scratch_allocator) { // TODO(csigg): This has side effects on the convolution descriptor. It is // functionally correct because the convolution is run with the algorithm of @@ -2070,10 +2067,9 @@ port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionBackwardFilterWorkspace( Stream* stream, const CudnnHandle& cudnn, const dnn::AlgorithmDesc& algorithm_desc, - const ScopedTensorDescriptor& input_nd, - const ScopedFilterDescriptor& filter, - const ScopedConvolutionDescriptor& conv, - const ScopedTensorDescriptor& output_nd, + const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter, + const CudnnConvolutionDescriptor& conv, + const CudnnTensorDescriptor& output_nd, ScratchAllocator* scratch_allocator) { // TODO(csigg): This has side effects on the convolution descriptor. It is // functionally correct because the convolution is run with the algorithm of @@ -2114,11 +2110,10 @@ AllocateCudnnConvolutionBackwardFilterWorkspace( port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm( Stream* stream, const CudnnHandle& cudnn, const dnn::AlgorithmConfig& algorithm_config, - const ScopedTensorDescriptor& input_nd, - const ScopedFilterDescriptor& filter, - const ScopedConvolutionDescriptor& conv, - const ScopedTensorDescriptor& output_nd, - ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch) { + const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter, + const CudnnConvolutionDescriptor& conv, + const CudnnTensorDescriptor& output_nd, ScratchAllocator* scratch_allocator, + DeviceMemory<uint8>* scratch) { dnn::AlgorithmDesc algo_desc = algorithm_config.algorithm(); if (algorithm_config.algorithm().is_default()) { // Pick fastest algorithm within memory limit according to cuDNN's @@ -2164,11 +2159,10 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm( port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm( Stream* stream, const CudnnHandle& cudnn, const dnn::AlgorithmConfig& algorithm_config, - const ScopedTensorDescriptor& input_nd, - const ScopedFilterDescriptor& filter, - const ScopedConvolutionDescriptor& conv, - const ScopedTensorDescriptor& output_nd, - ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch) { + const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter, + const CudnnConvolutionDescriptor& conv, + const CudnnTensorDescriptor& output_nd, ScratchAllocator* scratch_allocator, + DeviceMemory<uint8>* scratch) { dnn::AlgorithmDesc algo_desc = algorithm_config.algorithm(); if (algorithm_config.algorithm().is_default()) { // Pick fastest algorithm within memory limit according to cuDNN's @@ -2214,11 +2208,10 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm( port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardFilterAlgorithm( Stream* stream, const CudnnHandle& cudnn, const dnn::AlgorithmConfig& algorithm_config, - const ScopedTensorDescriptor& input_nd, - const ScopedFilterDescriptor& filter, - const ScopedConvolutionDescriptor& conv, - const ScopedTensorDescriptor& output_nd, - ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch) { + const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter, + const CudnnConvolutionDescriptor& conv, + const CudnnTensorDescriptor& output_nd, ScratchAllocator* scratch_allocator, + DeviceMemory<uint8>* scratch) { dnn::AlgorithmDesc algo_desc = algorithm_config.algorithm(); if (algorithm_config.algorithm().is_default()) { // Pick fastest algorithm within memory limit according to cuDNN's @@ -2387,11 +2380,11 @@ port::Status CudnnSupport::DoConvolveImpl( const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { cudnnDataType_t cudnn_type = GetCudnnDataType<T>(); - ScopedTensorDescriptor input_nd(input_descriptor, cudnn_type); - ScopedTensorDescriptor output_nd(output_descriptor, cudnn_type); - ScopedFilterDescriptor filter(filter_descriptor, cudnn_type); - ScopedConvolutionDescriptor conv(convolution_descriptor, - GetConvComputeType<T>()); + CudnnTensorDescriptor input_nd(input_descriptor, cudnn_type); + CudnnTensorDescriptor output_nd(output_descriptor, cudnn_type); + CudnnFilterDescriptor filter(filter_descriptor, cudnn_type); + CudnnConvolutionDescriptor conv(convolution_descriptor, + GetConvComputeType<T>()); auto cudnn = cudnn_->GetHandle(parent_, stream); // Alpha is the scaling factor for input. @@ -2493,14 +2486,14 @@ port::Status CudnnSupport::DoFusedConvolveImpl( "Relu activation."); } - ScopedTensorDescriptor conv_input_nd( + CudnnTensorDescriptor conv_input_nd( conv_input_descriptor, static_cast<cudnnDataType_t>(cudnn_data_type)); - ScopedTensorDescriptor output_nd( + CudnnTensorDescriptor output_nd( output_descriptor, static_cast<cudnnDataType_t>(cudnn_data_type)); - ScopedFilterDescriptor filter(filter_descriptor, - static_cast<cudnnDataType_t>(cudnn_data_type)); - ScopedTensorDescriptor bias_nd(bias_descriptor, CUDNN_DATA_FLOAT); - ScopedConvolutionDescriptor conv( + CudnnFilterDescriptor filter(filter_descriptor, + static_cast<cudnnDataType_t>(cudnn_data_type)); + CudnnTensorDescriptor bias_nd(bias_descriptor, CUDNN_DATA_FLOAT); + CudnnConvolutionDescriptor conv( convolution_descriptor, static_cast<cudnnDataType_t>(cudnn_compute_type)); auto cudnn = cudnn_->GetHandle(parent_, stream); @@ -2528,7 +2521,7 @@ port::Status CudnnSupport::DoFusedConvolveImpl( // activation descriptor. Note that this will change the nan propagation // behavior from separate conv, bias, and relu (which by default is // CUDNN_PROPAGATE_NAN. - ScopedActivationDescriptor activation_desc( + CudnnActivationDescriptor activation_desc( activation_mode, CUDNN_NOT_PROPAGATE_NAN, output_descriptor.value_max()); auto side_input_data_ptr = (side_input_scale == 0) ? output_data->opaque() : side_input_data.opaque(); @@ -2740,8 +2733,8 @@ port::Status CudnnSupport::DoBatchNormalizationForwardImpl( DeviceMemory<U>* saved_mean, DeviceMemory<U>* saved_inv_var, bool is_training, std::function<const DeviceMemory<U>&()> var_to_inv_var, std::function<void()> inv_var_to_var) { - ScopedTensorDescriptor x_descriptor(x_desc, ToCudnnDataType(input_data_type)); - ScopedTensorDescriptor scale_offset_descriptor( + CudnnTensorDescriptor x_descriptor(x_desc, ToCudnnDataType(input_data_type)); + CudnnTensorDescriptor scale_offset_descriptor( scale_offset_desc, ToCudnnDataType(scale_data_type)); cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; #if CUDNN_VERSION >= 7000 @@ -2825,9 +2818,9 @@ port::Status CudnnSupport::DoBatchNormalizationBackwardImpl( const dnn::BatchDescriptor& scale_offset_desc, const double epsilon, DeviceMemory<T>* x_backprop, DeviceMemory<U>* scale_backprop, DeviceMemory<U>* offset_backprop) { - ScopedTensorDescriptor x_descriptor( + CudnnTensorDescriptor x_descriptor( x_desc, static_cast<cudnnDataType_t>(cudnn_input_type)); - ScopedTensorDescriptor scale_offset_descriptor( + CudnnTensorDescriptor scale_offset_descriptor( scale_offset_desc, static_cast<cudnnDataType_t>(cudnn_scale_type)); cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; #if CUDNN_VERSION >= 7000 @@ -3017,9 +3010,9 @@ bool CudnnSupport::DoTransformTensor(Stream* stream, dnn::DataType output_type, float scale, DeviceMemoryBase* output_data) { float beta = 0.0f; - ScopedTensorDescriptor input_tensor_desc( + CudnnTensorDescriptor input_tensor_desc( input_desc, ToCudnnDataType(input_type, input_desc.layout())); - ScopedTensorDescriptor output_tensor_desc( + CudnnTensorDescriptor output_tensor_desc( output_desc, ToCudnnDataType(output_type, output_desc.layout())); auto cudnn = cudnn_->GetHandle(parent_, stream); auto status = [&] { @@ -3056,11 +3049,11 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl( auto cudnn = cudnn_->GetHandle(parent_, stream); - ScopedTensorDescriptor out_back_nd(output_descriptor, cudnn_type); - ScopedTensorDescriptor in_back_nd(input_descriptor, cudnn_type); - ScopedFilterDescriptor filter(filter_descriptor, cudnn_type); - ScopedConvolutionDescriptor conv(convolution_descriptor, - GetConvComputeType<T>()); + CudnnTensorDescriptor out_back_nd(output_descriptor, cudnn_type); + CudnnTensorDescriptor in_back_nd(input_descriptor, cudnn_type); + CudnnFilterDescriptor filter(filter_descriptor, cudnn_type); + CudnnConvolutionDescriptor conv(convolution_descriptor, + GetConvComputeType<T>()); const bool is_profiling = output_profile_result != nullptr; @@ -3192,11 +3185,11 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl( auto cudnn = cudnn_->GetHandle(parent_, stream); - ScopedTensorDescriptor out_back_nd(output_descriptor, cudnn_type); - ScopedTensorDescriptor input_nd(input_descriptor, cudnn_type); - ScopedFilterDescriptor filter(filter_descriptor, cudnn_type); - ScopedConvolutionDescriptor conv(convolution_descriptor, - GetConvComputeType<T>()); + CudnnTensorDescriptor out_back_nd(output_descriptor, cudnn_type); + CudnnTensorDescriptor input_nd(input_descriptor, cudnn_type); + CudnnFilterDescriptor filter(filter_descriptor, cudnn_type); + CudnnConvolutionDescriptor conv(convolution_descriptor, + GetConvComputeType<T>()); const bool is_profiling = output_profile_result != nullptr; @@ -3338,8 +3331,8 @@ port::Status CudnnSupport::DoConvolveBackwardBiasImpl( const dnn::BatchDescriptor& bias_descriptor, DeviceMemory<T>* backward_bias_data) { cudnnDataType_t cudnn_type = GetCudnnDataType<T>(); - ScopedTensorDescriptor input_nd(input_descriptor, cudnn_type); - ScopedTensorDescriptor bias_nd(bias_descriptor, cudnn_type); + CudnnTensorDescriptor input_nd(input_descriptor, cudnn_type); + CudnnTensorDescriptor bias_nd(bias_descriptor, cudnn_type); // Alpha is the scaling factor for input. float alpha = 1.0; @@ -3526,7 +3519,7 @@ bool CudnnSupport::DoBiasAdd(Stream* stream, const DeviceMemory<float>& biases, const dnn::BatchDescriptor& dimensions, DeviceMemory<float>* output_data) { - ScopedTensorDescriptor input_descriptor(dimensions, CUDNN_DATA_FLOAT); + CudnnTensorDescriptor input_descriptor(dimensions, CUDNN_DATA_FLOAT); dnn::BatchDescriptor bias_dimensions; bias_dimensions.set_count(1) @@ -3534,7 +3527,7 @@ bool CudnnSupport::DoBiasAdd(Stream* stream, .set_height(1) .set_width(1) .set_layout(dnn::DataLayout::kBatchYXDepth); - ScopedTensorDescriptor bias_descriptor(bias_dimensions, CUDNN_DATA_FLOAT); + CudnnTensorDescriptor bias_descriptor(bias_dimensions, CUDNN_DATA_FLOAT); // cudnnAddTensor after R3 is in-place, so we need to copy input_data to // output_data before doing the addition, unless the input and @@ -3570,10 +3563,10 @@ bool CudnnSupport::DoActivate(Stream* stream, const DeviceMemory<float>& input_data, DeviceMemory<float>* output_data, uint64 options) { - ScopedActivationDescriptor activation_desc( + CudnnActivationDescriptor activation_desc( activation_mode, CUDNN_PROPAGATE_NAN, dimensions.value_max()); - ScopedTensorDescriptor input_nd(dimensions, CUDNN_DATA_FLOAT); + CudnnTensorDescriptor input_nd(dimensions, CUDNN_DATA_FLOAT); // Alpha is the input scaling factor. float alpha = 1.0; // Beta is the output scaling factor. @@ -3600,9 +3593,9 @@ bool CudnnSupport::DoPoolForward( // Beta is the scaling factor for output. double beta = 0.0; - ScopedTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_DOUBLE); - ScopedTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_DOUBLE); - ScopedPoolingDescriptor pooling_desc(pooling_dimensions); + CudnnTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_DOUBLE); + CudnnTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_DOUBLE); + CudnnPoolingDescriptor pooling_desc(pooling_dimensions); auto cudnn = cudnn_->GetHandle(parent_, stream); auto status = [&] { @@ -3625,9 +3618,9 @@ bool CudnnSupport::DoPoolForward( // Beta is the scaling factor for output. float beta = 0.0; - ScopedTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_FLOAT); - ScopedTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_FLOAT); - ScopedPoolingDescriptor pooling_desc(pooling_dimensions); + CudnnTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_FLOAT); + CudnnTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_FLOAT); + CudnnPoolingDescriptor pooling_desc(pooling_dimensions); auto cudnn = cudnn_->GetHandle(parent_, stream); auto status = [&] { @@ -3650,9 +3643,9 @@ bool CudnnSupport::DoPoolForward( // Beta is the scaling factor for output. float beta = 0.0; - ScopedTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_HALF); - ScopedTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_HALF); - ScopedPoolingDescriptor pooling_desc(pooling_dimensions); + CudnnTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_HALF); + CudnnTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_HALF); + CudnnPoolingDescriptor pooling_desc(pooling_dimensions); auto cudnn = cudnn_->GetHandle(parent_, stream); auto status = [&] { RETURN_IF_CUDNN_ERROR(cudnnPoolingForward( @@ -3676,9 +3669,9 @@ bool CudnnSupport::DoPoolBackward( // Beta is the scaling factor for output. double beta = 0.0; - ScopedTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_DOUBLE); - ScopedTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_DOUBLE); - ScopedPoolingDescriptor pooling_desc(pooling_dimensions); + CudnnTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_DOUBLE); + CudnnTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_DOUBLE); + CudnnPoolingDescriptor pooling_desc(pooling_dimensions); auto cudnn = cudnn_->GetHandle(parent_, stream); auto status = [&] { @@ -3705,9 +3698,9 @@ bool CudnnSupport::DoPoolBackward( // Beta is the scaling factor for output. float beta = 0.0; - ScopedTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_FLOAT); - ScopedTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_FLOAT); - ScopedPoolingDescriptor pooling_desc(pooling_dimensions); + CudnnTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_FLOAT); + CudnnTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_FLOAT); + CudnnPoolingDescriptor pooling_desc(pooling_dimensions); auto cudnn = cudnn_->GetHandle(parent_, stream); auto status = [&] { @@ -3734,9 +3727,9 @@ bool CudnnSupport::DoPoolBackward( // Beta is the scaling factor for output. float beta = 0.0; - ScopedTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_HALF); - ScopedTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_HALF); - ScopedPoolingDescriptor pooling_desc(pooling_dimensions); + CudnnTensorDescriptor src_desc(input_dimensions, CUDNN_DATA_HALF); + CudnnTensorDescriptor dest_desc(output_dimensions, CUDNN_DATA_HALF); + CudnnPoolingDescriptor pooling_desc(pooling_dimensions); auto cudnn = cudnn_->GetHandle(parent_, stream); auto status = [&] { @@ -3771,8 +3764,8 @@ bool CudnnSupport::DoNormalizeWithDimensions( return false; } - ScopedTensorDescriptor dims(dimensions, CUDNN_DATA_FLOAT); - ScopedNormalizeDescriptor normalize(normalize_descriptor); + CudnnTensorDescriptor dims(dimensions, CUDNN_DATA_FLOAT); + CudnnNormalizeDescriptor normalize(normalize_descriptor); // Alpha is the scaling factor for input. float alpha = 1.0f; @@ -3808,8 +3801,8 @@ bool CudnnSupport::DoNormalizeBackwardWithDimensions( return false; } - ScopedTensorDescriptor dims(dimensions, CUDNN_DATA_FLOAT); - ScopedNormalizeDescriptor normalize(normalize_descriptor); + CudnnTensorDescriptor dims(dimensions, CUDNN_DATA_FLOAT); + CudnnNormalizeDescriptor normalize(normalize_descriptor); float alpha = 1.0f; float beta = 0.0f; @@ -3932,9 +3925,9 @@ bool CudnnSupport::DeriveOutputBatchDescriptor( const dnn::FilterDescriptor& filter_descriptor, const dnn::ConvolutionDescriptor& convolution_descriptor, dnn::BatchDescriptor* output_batch_descriptor) { - ScopedTensorDescriptor input_nd(batch_descriptor, CUDNN_DATA_FLOAT); - ScopedFilterDescriptor filter(filter_descriptor, CUDNN_DATA_FLOAT); - ScopedConvolutionDescriptor conv(convolution_descriptor, CUDNN_DATA_FLOAT); + CudnnTensorDescriptor input_nd(batch_descriptor, CUDNN_DATA_FLOAT); + CudnnFilterDescriptor filter(filter_descriptor, CUDNN_DATA_FLOAT); + CudnnConvolutionDescriptor conv(convolution_descriptor, CUDNN_DATA_FLOAT); int dn = batch_descriptor.ndims() + 2; std::vector<int> dims(dn); // in BDYX |