diff options
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_dnn.cc')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.cc | 250 |
1 files changed, 122 insertions, 128 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index af78efe81d..7ace7fd303 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -53,13 +53,6 @@ PLUGIN_REGISTRY_DEFINE_PLUGIN_ID(kCuDnnPlugin); namespace { -// TODO(csigg): remove dnn namespace qualifier from the RNN code below. -using ::stream_executor::dnn::BatchDescriptor; -using ::stream_executor::dnn::ConvolutionDescriptor; -using ::stream_executor::dnn::FilterDescriptor; -using ::stream_executor::dnn::NormalizeDescriptor; -using ::stream_executor::dnn::PoolingDescriptor; - // Converts (via narrowing) a type T value to a type U, and checks that the // value has no value change due to the conversion. template <typename WideT, typename NarrowT> @@ -390,7 +383,7 @@ namespace { // Turns a BatchDescriptor structure into a cudnn tensor handle within a scope. class ScopedTensorDescriptor { public: - ScopedTensorDescriptor(const BatchDescriptor& batch_descriptor, + ScopedTensorDescriptor(const dnn::BatchDescriptor& batch_descriptor, cudnnDataType_t elem_type) : handle_(nullptr) { cudnnStatus_t status = cudnnCreateTensorDescriptor(&handle_); @@ -464,7 +457,7 @@ class ScopedTensorDescriptor { // Turns a FilterDescriptor structure into a cudnn filter handle within a scope. class ScopedFilterDescriptor { public: - ScopedFilterDescriptor(const FilterDescriptor& filter_descriptor, + ScopedFilterDescriptor(const dnn::FilterDescriptor& filter_descriptor, cudnnDataType_t elem_type) : handle_(nullptr) { cudnnStatus_t status = cudnnCreateFilterDescriptor(&handle_); @@ -577,7 +570,7 @@ static bool BatchnormSpatialPersistentEnabled() { class ScopedConvolutionDescriptor { public: ScopedConvolutionDescriptor( - const ConvolutionDescriptor& convolution_descriptor, + const dnn::ConvolutionDescriptor& convolution_descriptor, cudnnDataType_t data_type) : handle_(nullptr) { cudnnStatus_t status = cudnnCreateConvolutionDescriptor(&handle_); @@ -671,7 +664,8 @@ class ScopedConvolutionDescriptor { // within a scope. class ScopedPoolingDescriptor { public: - explicit ScopedPoolingDescriptor(const PoolingDescriptor& pooling_descriptor) + explicit ScopedPoolingDescriptor( + const dnn::PoolingDescriptor& pooling_descriptor) : handle_(nullptr) { cudnnStatus_t status = cudnnCreatePoolingDescriptor(&handle_); if (status != CUDNN_STATUS_SUCCESS) { @@ -727,7 +721,7 @@ class ScopedPoolingDescriptor { class ScopedNormalizeDescriptor { public: explicit ScopedNormalizeDescriptor( - const NormalizeDescriptor& normalize_descriptor) + const dnn::NormalizeDescriptor& normalize_descriptor) : handle_(nullptr) { cudnnStatus_t status = cudnnCreateLRNDescriptor(&handle_); if (status != CUDNN_STATUS_SUCCESS) { @@ -1206,16 +1200,16 @@ CudnnRnnParamsDescriptor::CudnnRnnParamsDescriptor( int dims[] = {1, rnn_desc.input_size(), 1}; int strides[] = {dims[1] * dims[2], dims[2], 1}; status = cudnnSetTensorNdDescriptor( - /*tensorDesc=*/input_desc, rnn_desc.data_type() /*dataType*/, - sizeof(dims) / sizeof(dims[0]) /*nbDims*/, /*dimA=*/dims, + /*tensorDesc=*/input_desc, /*dataType=*/rnn_desc.data_type(), + /*nbDims=*/sizeof(dims) / sizeof(dims[0]), /*dimA=*/dims, /*strideA=*/strides); CUDNN_RETURN_IF_FAIL(status, "Cudnn fails to set tensor descriptor"); size_t params_size = 0; status = cudnnGetRNNParamsSize( - cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/, + /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(), /*xDesc=*/input_desc, /*sizeInBytes=*/¶ms_size, - rnn_desc.data_type() /*dataType*/); + /*dataType=*/rnn_desc.data_type()); CUDNN_RETURN_IF_FAIL(status, "Cudnn fails to get RNN parameter size"); params_size_in_bytes_ = static_cast<int64>(params_size); } @@ -1226,8 +1220,8 @@ CudnnRnnParamsDescriptor::CudnnRnnParamsDescriptor( CUDNN_RETURN_IF_FAIL(status, "Cudnn fails to create RNN filter descriptor"); int dims[] = {static_cast<int>(params_size_in_bytes_), 1, 1}; status = cudnnSetFilterNdDescriptor( - /*filterDesc=*/handle_, rnn_desc.data_type() /*dataType*/, - /*format=*/CUDNN_TENSOR_NCHW, sizeof(dims) / sizeof(dims[0]) /*nbDims*/, + /*filterDesc=*/handle_, /*dataType=*/rnn_desc.data_type(), + /*format=*/CUDNN_TENSOR_NCHW, /*nbDims=*/sizeof(dims) / sizeof(dims[0]), /*filterDimA=*/dims); CUDNN_RETURN_IF_FAIL(status, "Cudnn fails to update RNN filter descriptor"); } @@ -1247,7 +1241,7 @@ CudnnRnnParamsDescriptor::CudnnRnnParamsDescriptor( void* offset = nullptr; if (type == 0) { status = cudnnGetRNNLinLayerMatrixParams( - cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/, + /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(), /*layer=*/layer, /*xDesc=*/input_desc, /*wDesc=*/handle_, /*w=*/nullptr, /*linLayerID=*/region, /*linLayerMatDesc=*/region_desc_handle, @@ -1256,7 +1250,7 @@ CudnnRnnParamsDescriptor::CudnnRnnParamsDescriptor( status, "Cudnn fails to call cudnnGetRNNLinLayerMatrixParams"); } else { status = cudnnGetRNNLinLayerBiasParams( - cudnn.handle() /*rnnDesc*/, rnn_desc.handle() /*rnnDesc*/, + /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(), /*layer=*/layer, /*xDesc=*/input_desc, /*wDesc=*/handle_, /*w=*/nullptr, /*linLayerID=*/region, /*linLayerBiasDesc=*/region_desc_handle, @@ -1270,7 +1264,7 @@ CudnnRnnParamsDescriptor::CudnnRnnParamsDescriptor( int n_dims; status = cudnnGetFilterNdDescriptor( /*filterDesc=*/region_desc_handle, - sizeof(dims) / sizeof(dims[0]) /*nbDimsRequested*/, + /*nbDimsRequested=*/sizeof(dims) / sizeof(dims[0]), /*dataType=*/&data_type, /*format=*/&tensor_format, /*nbDims=*/&n_dims, /*filterDimA=*/dims); CUDNN_RETURN_IF_FAIL(status, "Cudnn fails to get filter description"); @@ -1338,7 +1332,7 @@ class CudnnRnnSequenceTensorDescriptor int strides[] = {dims[1] * dims[2], dims[2], 1}; status = cudnnSetTensorNdDescriptor( /*tensorDesc=*/handle, /*dataType=*/data_type, - sizeof(dims) / sizeof(dims[0]) /*nbDims*/, /*dimA=*/dims, + /*nbDims=*/sizeof(dims) / sizeof(dims[0]), /*dimA=*/dims, /*strideA=*/strides); CUDNN_RETURN_IF_FAIL(status, "Failed to update tensor descriptor"); // Replicate handle across the number of steps. @@ -1390,7 +1384,7 @@ class CudnnRnnStateTensorDescriptor int strides[] = {dims[1] * dims[2], dims[2], 1}; status = cudnnSetTensorNdDescriptor( /*tensorDesc=*/handle_, /*dataType=*/data_type, - sizeof(dims) / sizeof(dims[0]) /*nbDims*/, /*dimA=*/dims, + /*nbDims=*/sizeof(dims) / sizeof(dims[0]), /*dimA=*/dims, /*strideA=*/strides); CUDNN_RETURN_IF_FAIL(status, "Failed to update tensor descriptor"); } @@ -1497,9 +1491,9 @@ bool CheckRNNParameterSize(const CudnnHandle& cudnn, const CudnnRnnSequenceTensorDescriptor& input_desc) { size_t params_size_in_bytes = 0; cudnnStatus_t status = cudnnGetRNNParamsSize( - /*handle=*/cudnn.handle(), rnn_desc.handle() /*rnnDesc*/, - input_desc.handles()[0] /*xDesc*/, /*sizeInBytes=*/¶ms_size_in_bytes, - rnn_desc.data_type() /*dataType*/); + /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(), + /*xDesc=*/input_desc.handles()[0], /*sizeInBytes=*/¶ms_size_in_bytes, + /*dataType=*/rnn_desc.data_type()); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "Unable to check RNN param size: " << ToString(status); return false; @@ -1592,8 +1586,8 @@ bool CudnnSupport::DoRnnForwardImpl( if (is_training) { size_t reserve_space_size_in_bytes = 0; cudnnStatus_t status = cudnnGetRNNTrainingReserveSize( - cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/, - /*seqLength=*/model_dims.seq_length, input_desc.handles() /*xDesc*/, + /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(), + /*seqLength=*/model_dims.seq_length, /*xDesc=*/input_desc.handles(), /*sizeInBytes=*/&reserve_space_size_in_bytes); if (status != CUDNN_STATUS_SUCCESS) { LOG(ERROR) << "Unable to query reserve space size: " << ToString(status); @@ -1630,30 +1624,30 @@ bool CudnnSupport::DoRnnForwardImpl( cudnnStatus_t status; if (!is_training) { status = cudnnRNNForwardInference( - cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/, - model_dims.seq_length /*seqLength*/, input_desc.handles() /*xDesc*/, - input_data.opaque() /*x*/, input_h_desc.handle() /*hxDesc*/, - input_h_data.opaque() /*hx*/, input_c_desc.handle() /*cxDesc*/, - input_c_data.opaque() /*cx*/, rnn_desc.params_handle() /*wDesc*/, - params.opaque() /*w*/, output_desc.handles() /*yDesc*/, - output_data->opaque() /*y*/, output_h_desc.handle() /*hyDesc*/, - output_h_data->opaque() /*hy*/, output_c_desc.handle() /*cyDesc*/, - output_c_data->opaque() /*cy*/, workspace.opaque() /*workspace*/, - workspace.size() /*workSpaceSizeInBytes*/); + /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(), + /*seqLength=*/model_dims.seq_length, /*xDesc=*/input_desc.handles(), + /*x=*/input_data.opaque(), /*hxDesc=*/input_h_desc.handle(), + /*hx=*/input_h_data.opaque(), /*cxDesc=*/input_c_desc.handle(), + /*cx=*/input_c_data.opaque(), /*wDesc=*/rnn_desc.params_handle(), + /*w=*/params.opaque(), /*yDesc=*/output_desc.handles(), + /*y=*/output_data->opaque(), /*hyDesc=*/output_h_desc.handle(), + /*hy=*/output_h_data->opaque(), /*cyDesc=*/output_c_desc.handle(), + /*cy=*/output_c_data->opaque(), /*workspace=*/workspace.opaque(), + /*workSpaceSizeInBytes=*/workspace.size()); } else { status = cudnnRNNForwardTraining( - cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/, - model_dims.seq_length /*seqLength*/, input_desc.handles() /*xDesc*/, - input_data.opaque() /*x*/, input_h_desc.handle() /*hxDesc*/, - input_h_data.opaque() /*hx*/, input_c_desc.handle() /*cxDesc*/, - input_c_data.opaque() /*cx*/, rnn_desc.params_handle() /*wDesc*/, - params.opaque() /*w*/, output_desc.handles() /*yDesc*/, - output_data->opaque() /*y*/, output_h_desc.handle() /*hyDesc*/, - output_h_data->opaque() /*hy*/, output_c_desc.handle() /*cyDesc*/, - output_c_data->opaque() /*cy*/, workspace.opaque() /*workspace*/, - workspace.size() /*workSpaceSizeInBytes*/, - reserve_space.opaque() /*reserveSpace*/, - reserve_space.size() /*reserveSpaceSizeInBytes*/); + /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(), + /*seqLength=*/model_dims.seq_length, /*xDesc=*/input_desc.handles(), + /*x=*/input_data.opaque(), /*hxDesc=*/input_h_desc.handle(), + /*hx=*/input_h_data.opaque(), /*cxDesc=*/input_c_desc.handle(), + /*cx=*/input_c_data.opaque(), /*wDesc=*/rnn_desc.params_handle(), + /*w=*/params.opaque(), /*yDesc=*/output_desc.handles(), + /*y=*/output_data->opaque(), /*hyDesc=*/output_h_desc.handle(), + /*hy=*/output_h_data->opaque(), /*cyDesc=*/output_c_desc.handle(), + /*cy=*/output_c_data->opaque(), /*workspace=*/workspace.opaque(), + /*workSpaceSizeInBytes=*/workspace.size(), + /*reserveSpace=*/reserve_space.opaque(), + /*reserveSpaceSizeInBytes=*/reserve_space.size()); } if (is_profiling) { if (!timer->Stop(AsCUDAStream(stream))) { @@ -1748,24 +1742,24 @@ bool CudnnSupport::DoRnnBackwardImpl( } // make the backward data call cudnnStatus_t status = cudnnRNNBackwardData( - cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/, - model_dims.seq_length /*seqLength*/, output_desc.handles() /*yDesc*/, - output_data.opaque() /*y*/, output_desc.handles() /*dyDesc*/, - output_backprop_data.opaque() /*dy*/, output_h_desc.handle() /*dhyDesc*/, - output_h_backprop_data.opaque() /*dhy*/, - output_c_desc.handle() /*dcyDesc*/, - output_c_backprop_data.opaque() /*dcy*/, - rnn_desc.params_handle() /*wDesc*/, params.opaque() /*w*/, - input_h_desc.handle() /*hxDesc*/, input_h_data.opaque() /*hx*/, - input_c_desc.handle() /*cxDesc*/, input_c_data.opaque() /*cx*/, - input_desc.handles() /*dxDesc*/, input_backprop_data->opaque() /*dx*/, - input_h_desc.handle() /*dhxDesc*/, - input_h_backprop_data->opaque() /*dhx*/, - input_c_desc.handle() /*dcxDesc*/, - input_c_backprop_data->opaque() /*dcx*/, workspace.opaque() /*workspace*/, - workspace.size() /*workSpaceSizeInBytes*/, - reserve_space_data->opaque() /*reserveSpace*/, - reserve_space_data->size() /*reserveSpaceSizeInBytes*/); + /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(), + /*seqLength=*/model_dims.seq_length, /*yDesc=*/output_desc.handles(), + /*y=*/output_data.opaque(), /*dyDesc=*/output_desc.handles(), + /*dy=*/output_backprop_data.opaque(), /*dhyDesc=*/output_h_desc.handle(), + /*dhy=*/output_h_backprop_data.opaque(), + /*dcyDesc=*/output_c_desc.handle(), + /*dcy=*/output_c_backprop_data.opaque(), + /*wDesc=*/rnn_desc.params_handle(), /*w=*/params.opaque(), + /*hxDesc=*/input_h_desc.handle(), /*hx=*/input_h_data.opaque(), + /*cxDesc=*/input_c_desc.handle(), /*cx=*/input_c_data.opaque(), + /*dxDesc=*/input_desc.handles(), /*dx=*/input_backprop_data->opaque(), + /*dhxDesc=*/input_h_desc.handle(), + /*dhx=*/input_h_backprop_data->opaque(), + /*dcxDesc=*/input_c_desc.handle(), + /*dcx=*/input_c_backprop_data->opaque(), /*workspace=*/workspace.opaque(), + /*workSpaceSizeInBytes=*/workspace.size(), + /*reserveSpace=*/reserve_space_data->opaque(), + /*reserveSpaceSizeInBytes=*/reserve_space_data->size()); if (status != CUDNN_STATUS_SUCCESS) { if (is_profiling) { @@ -1780,16 +1774,16 @@ bool CudnnSupport::DoRnnBackwardImpl( stream->ThenMemZero(params_backprop_data, params_backprop_data->size()); // make the backward weight call status = cudnnRNNBackwardWeights( - cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/, - model_dims.seq_length /*seqLength*/, input_desc.handles() /*xDesc*/, - input_data.opaque() /*x*/, input_h_desc.handle() /*hxDesc*/, - input_h_data.opaque() /*hx*/, output_desc.handles() /*yDesc*/, - output_data.opaque() /*y*/, workspace.opaque() /*workspace*/, - workspace.size() /*workSpaceSizeInBytes*/, - rnn_desc.params_handle() /*dwDesc*/, - params_backprop_data->opaque() /*dw*/, - reserve_space_data->opaque() /*reserveSpace*/, - reserve_space_data->size() /*reserveSpaceSizeInBytes*/); + /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(), + /*seqLength=*/model_dims.seq_length, /*xDesc=*/input_desc.handles(), + /*x=*/input_data.opaque(), /*hxDesc=*/input_h_desc.handle(), + /*hx=*/input_h_data.opaque(), /*yDesc=*/output_desc.handles(), + /*y=*/output_data.opaque(), /*workspace=*/workspace.opaque(), + /*workSpaceSizeInBytes=*/workspace.size(), + /*dwDesc=*/rnn_desc.params_handle(), + /*dw=*/params_backprop_data->opaque(), + /*reserveSpace=*/reserve_space_data->opaque(), + /*reserveSpaceSizeInBytes=*/reserve_space_data->size()); if (status != CUDNN_STATUS_SUCCESS) { if (is_profiling) { timer->Stop(AsCUDAStream(stream)); @@ -2415,12 +2409,12 @@ cudnnDataType_t GetRnnComputeType(dnn::DataType data_type) { template <class T> bool CudnnSupport::DoConvolveImpl( - Stream* stream, const BatchDescriptor& input_descriptor, + Stream* stream, const dnn::BatchDescriptor& input_descriptor, const DeviceMemory<T>& input_data, - const FilterDescriptor& filter_descriptor, + const dnn::FilterDescriptor& filter_descriptor, const DeviceMemory<T>& filter_data, - const ConvolutionDescriptor& convolution_descriptor, - const BatchDescriptor& output_descriptor, DeviceMemory<T>* output_data, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& output_descriptor, DeviceMemory<T>* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { @@ -3038,13 +3032,13 @@ bool CudnnSupport::DoBatchNormalizationBackwardImpl( } bool CudnnSupport::DoConvolve( - Stream* stream, const BatchDescriptor& batch_descriptor, + Stream* stream, const dnn::BatchDescriptor& batch_descriptor, const DeviceMemory<float>& input_data, - const FilterDescriptor& filter_descriptor, + const dnn::FilterDescriptor& filter_descriptor, const DeviceMemory<float>& filter_data, - const ConvolutionDescriptor& convolution_descriptor, - const BatchDescriptor& output_descriptor, DeviceMemory<float>* output_data, - ScratchAllocator* scratch_allocator, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { return DoConvolveImpl<float>( @@ -3054,13 +3048,13 @@ bool CudnnSupport::DoConvolve( } bool CudnnSupport::DoConvolve( - Stream* stream, const BatchDescriptor& batch_descriptor, + Stream* stream, const dnn::BatchDescriptor& batch_descriptor, const DeviceMemory<double>& input_data, - const FilterDescriptor& filter_descriptor, + const dnn::FilterDescriptor& filter_descriptor, const DeviceMemory<double>& filter_data, - const ConvolutionDescriptor& convolution_descriptor, - const BatchDescriptor& output_descriptor, DeviceMemory<double>* output_data, - ScratchAllocator* scratch_allocator, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { return DoConvolveImpl<double>( @@ -3070,12 +3064,12 @@ bool CudnnSupport::DoConvolve( } bool CudnnSupport::DoConvolve( - Stream* stream, const BatchDescriptor& batch_descriptor, + Stream* stream, const dnn::BatchDescriptor& batch_descriptor, const DeviceMemory<Eigen::half>& input_data, - const FilterDescriptor& filter_descriptor, + const dnn::FilterDescriptor& filter_descriptor, const DeviceMemory<Eigen::half>& filter_data, - const ConvolutionDescriptor& convolution_descriptor, - const BatchDescriptor& output_descriptor, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& output_descriptor, DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { @@ -3202,7 +3196,8 @@ namespace { template <class T> DeviceMemory<T> MaybeTransformLayout( Stream* stream, const CudnnHandle& cudnn, - BatchDescriptor* output_descriptor, DeviceMemory<T> backward_output_data, + dnn::BatchDescriptor* output_descriptor, + DeviceMemory<T> backward_output_data, std::unique_ptr<TemporaryDeviceMemory<T>>* transform_scratch) { if (output_descriptor->layout() == dnn::DataLayout::kBatchDepthYX) { return backward_output_data; @@ -3211,7 +3206,7 @@ DeviceMemory<T> MaybeTransformLayout( *transform_scratch = stream->AllocateTemporaryArray<T>(backward_output_data.ElementCount()) .ConsumeValueOrDie(); - BatchDescriptor transformed_output_descriptor; + dnn::BatchDescriptor transformed_output_descriptor; transformed_output_descriptor.CloneFrom(*output_descriptor); transformed_output_descriptor.set_layout(dnn::DataLayout::kBatchDepthYX); cudnnDataType_t cudnn_type = GetCudnnDataType<T>(); @@ -3263,12 +3258,12 @@ bool CudnnSupport::DoTransformTensor(Stream* stream, template <class T> bool CudnnSupport::DoConvolveBackwardDataImpl( - Stream* stream, const FilterDescriptor& filter_descriptor, + Stream* stream, const dnn::FilterDescriptor& filter_descriptor, const DeviceMemory<T>& filter_data, - const BatchDescriptor& output_descriptor_in, + const dnn::BatchDescriptor& output_descriptor_in, DeviceMemory<T> backward_output_data, - const ConvolutionDescriptor& convolution_descriptor, - const BatchDescriptor& input_descriptor, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& input_descriptor, DeviceMemory<T>* backward_input_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { @@ -3287,7 +3282,7 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( auto cudnn = cudnn_->GetHandle(parent_, stream); // TBD(keveman): remove once cuDNN supports kBatchYXDepth for backward pass. - BatchDescriptor output_descriptor; + dnn::BatchDescriptor output_descriptor; output_descriptor.CloneFrom(output_descriptor_in); std::unique_ptr<TemporaryDeviceMemory<T>> transform_scratch; backward_output_data = @@ -3475,12 +3470,12 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( } bool CudnnSupport::DoConvolveBackwardData( - Stream* stream, const FilterDescriptor& filter_descriptor, + Stream* stream, const dnn::FilterDescriptor& filter_descriptor, const DeviceMemory<double>& filter_data, - const BatchDescriptor& output_descriptor, + const dnn::BatchDescriptor& output_descriptor, DeviceMemory<double> backward_output_data, - const ConvolutionDescriptor& convolution_descriptor, - const BatchDescriptor& input_descriptor, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& input_descriptor, DeviceMemory<double>* backward_input_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, @@ -3493,12 +3488,12 @@ bool CudnnSupport::DoConvolveBackwardData( } bool CudnnSupport::DoConvolveBackwardData( - Stream* stream, const FilterDescriptor& filter_descriptor, + Stream* stream, const dnn::FilterDescriptor& filter_descriptor, const DeviceMemory<float>& filter_data, - const BatchDescriptor& output_descriptor, + const dnn::BatchDescriptor& output_descriptor, DeviceMemory<float> backward_output_data, - const ConvolutionDescriptor& convolution_descriptor, - const BatchDescriptor& input_descriptor, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& input_descriptor, DeviceMemory<float>* backward_input_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, @@ -3511,12 +3506,12 @@ bool CudnnSupport::DoConvolveBackwardData( } bool CudnnSupport::DoConvolveBackwardData( - Stream* stream, const FilterDescriptor& filter_descriptor, + Stream* stream, const dnn::FilterDescriptor& filter_descriptor, const DeviceMemory<Eigen::half>& filter_data, - const BatchDescriptor& output_descriptor, + const dnn::BatchDescriptor& output_descriptor, DeviceMemory<Eigen::half> backward_output_data, - const ConvolutionDescriptor& convolution_descriptor, - const BatchDescriptor& input_descriptor, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& input_descriptor, DeviceMemory<Eigen::half>* backward_input_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, @@ -3554,7 +3549,7 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl( auto cudnn = cudnn_->GetHandle(parent_, stream); // TBD(keveman): remove once cuDNN supports kBatchYXDepth for backward pass. - BatchDescriptor output_descriptor; + dnn::BatchDescriptor output_descriptor; output_descriptor.CloneFrom(output_descriptor_in); std::unique_ptr<TemporaryDeviceMemory<T>> transform_scratch; backward_output_data = @@ -3826,27 +3821,27 @@ bool CudnnSupport::DoConvolveBackwardBiasImpl( } bool CudnnSupport::DoConvolveBackwardBias( - Stream* stream, const BatchDescriptor& input_descriptor, + Stream* stream, const dnn::BatchDescriptor& input_descriptor, const DeviceMemory<double>& input_data, - const BatchDescriptor& bias_descriptor, + const dnn::BatchDescriptor& bias_descriptor, DeviceMemory<double>* backward_bias_data) { return DoConvolveBackwardBiasImpl(stream, input_descriptor, input_data, bias_descriptor, backward_bias_data); } bool CudnnSupport::DoConvolveBackwardBias( - Stream* stream, const BatchDescriptor& input_descriptor, + Stream* stream, const dnn::BatchDescriptor& input_descriptor, const DeviceMemory<float>& input_data, - const BatchDescriptor& bias_descriptor, + const dnn::BatchDescriptor& bias_descriptor, DeviceMemory<float>* backward_bias_data) { return DoConvolveBackwardBiasImpl(stream, input_descriptor, input_data, bias_descriptor, backward_bias_data); } bool CudnnSupport::DoConvolveBackwardBias( - Stream* stream, const BatchDescriptor& input_descriptor, + Stream* stream, const dnn::BatchDescriptor& input_descriptor, const DeviceMemory<Eigen::half>& input_data, - const BatchDescriptor& bias_descriptor, + const dnn::BatchDescriptor& bias_descriptor, DeviceMemory<Eigen::half>* backward_bias_data) { return DoConvolveBackwardBiasImpl(stream, input_descriptor, input_data, bias_descriptor, backward_bias_data); @@ -3994,7 +3989,7 @@ bool CudnnSupport::DoBiasAdd(Stream* stream, DeviceMemory<float>* output_data) { ScopedTensorDescriptor input_descriptor(dimensions, CUDNN_DATA_FLOAT); - BatchDescriptor bias_dimensions; + dnn::BatchDescriptor bias_dimensions; bias_dimensions.set_count(1) .set_feature_map_count(dimensions.feature_map_count()) .set_height(1) @@ -4453,8 +4448,8 @@ bool CudnnSupport::DoMemcpyH2DQuantized( } bool CudnnSupport::DeriveOutputBatchDescriptor( - const BatchDescriptor& batch_descriptor, - const FilterDescriptor& filter_descriptor, + const dnn::BatchDescriptor& batch_descriptor, + const dnn::FilterDescriptor& filter_descriptor, const dnn::ConvolutionDescriptor& convolution_descriptor, dnn::BatchDescriptor* output_batch_descriptor) { ScopedTensorDescriptor input_nd(batch_descriptor, CUDNN_DATA_FLOAT); @@ -4493,9 +4488,8 @@ void initialize_cudnn() { cuda::CUDAExecutor* cuda_executor = dynamic_cast<cuda::CUDAExecutor*>(parent); if (cuda_executor == nullptr) { - LOG(ERROR) - << "Attempting to initialize an instance of the cuBLAS " - << "support library with a non-CUDA StreamExecutor"; + LOG(ERROR) << "Attempting to initialize an instance of the cuDNN " + << "support library with a non-CUDA StreamExecutor"; return nullptr; } |