aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda/cuda_dnn.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_dnn.cc')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.cc250
1 files changed, 122 insertions, 128 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index af78efe81d..7ace7fd303 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -53,13 +53,6 @@ PLUGIN_REGISTRY_DEFINE_PLUGIN_ID(kCuDnnPlugin);
namespace {
-// TODO(csigg): remove dnn namespace qualifier from the RNN code below.
-using ::stream_executor::dnn::BatchDescriptor;
-using ::stream_executor::dnn::ConvolutionDescriptor;
-using ::stream_executor::dnn::FilterDescriptor;
-using ::stream_executor::dnn::NormalizeDescriptor;
-using ::stream_executor::dnn::PoolingDescriptor;
-
// Converts (via narrowing) a type T value to a type U, and checks that the
// value has no value change due to the conversion.
template <typename WideT, typename NarrowT>
@@ -390,7 +383,7 @@ namespace {
// Turns a BatchDescriptor structure into a cudnn tensor handle within a scope.
class ScopedTensorDescriptor {
public:
- ScopedTensorDescriptor(const BatchDescriptor& batch_descriptor,
+ ScopedTensorDescriptor(const dnn::BatchDescriptor& batch_descriptor,
cudnnDataType_t elem_type)
: handle_(nullptr) {
cudnnStatus_t status = cudnnCreateTensorDescriptor(&handle_);
@@ -464,7 +457,7 @@ class ScopedTensorDescriptor {
// Turns a FilterDescriptor structure into a cudnn filter handle within a scope.
class ScopedFilterDescriptor {
public:
- ScopedFilterDescriptor(const FilterDescriptor& filter_descriptor,
+ ScopedFilterDescriptor(const dnn::FilterDescriptor& filter_descriptor,
cudnnDataType_t elem_type)
: handle_(nullptr) {
cudnnStatus_t status = cudnnCreateFilterDescriptor(&handle_);
@@ -577,7 +570,7 @@ static bool BatchnormSpatialPersistentEnabled() {
class ScopedConvolutionDescriptor {
public:
ScopedConvolutionDescriptor(
- const ConvolutionDescriptor& convolution_descriptor,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
cudnnDataType_t data_type)
: handle_(nullptr) {
cudnnStatus_t status = cudnnCreateConvolutionDescriptor(&handle_);
@@ -671,7 +664,8 @@ class ScopedConvolutionDescriptor {
// within a scope.
class ScopedPoolingDescriptor {
public:
- explicit ScopedPoolingDescriptor(const PoolingDescriptor& pooling_descriptor)
+ explicit ScopedPoolingDescriptor(
+ const dnn::PoolingDescriptor& pooling_descriptor)
: handle_(nullptr) {
cudnnStatus_t status = cudnnCreatePoolingDescriptor(&handle_);
if (status != CUDNN_STATUS_SUCCESS) {
@@ -727,7 +721,7 @@ class ScopedPoolingDescriptor {
class ScopedNormalizeDescriptor {
public:
explicit ScopedNormalizeDescriptor(
- const NormalizeDescriptor& normalize_descriptor)
+ const dnn::NormalizeDescriptor& normalize_descriptor)
: handle_(nullptr) {
cudnnStatus_t status = cudnnCreateLRNDescriptor(&handle_);
if (status != CUDNN_STATUS_SUCCESS) {
@@ -1206,16 +1200,16 @@ CudnnRnnParamsDescriptor::CudnnRnnParamsDescriptor(
int dims[] = {1, rnn_desc.input_size(), 1};
int strides[] = {dims[1] * dims[2], dims[2], 1};
status = cudnnSetTensorNdDescriptor(
- /*tensorDesc=*/input_desc, rnn_desc.data_type() /*dataType*/,
- sizeof(dims) / sizeof(dims[0]) /*nbDims*/, /*dimA=*/dims,
+ /*tensorDesc=*/input_desc, /*dataType=*/rnn_desc.data_type(),
+ /*nbDims=*/sizeof(dims) / sizeof(dims[0]), /*dimA=*/dims,
/*strideA=*/strides);
CUDNN_RETURN_IF_FAIL(status, "Cudnn fails to set tensor descriptor");
size_t params_size = 0;
status = cudnnGetRNNParamsSize(
- cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/,
+ /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
/*xDesc=*/input_desc, /*sizeInBytes=*/&params_size,
- rnn_desc.data_type() /*dataType*/);
+ /*dataType=*/rnn_desc.data_type());
CUDNN_RETURN_IF_FAIL(status, "Cudnn fails to get RNN parameter size");
params_size_in_bytes_ = static_cast<int64>(params_size);
}
@@ -1226,8 +1220,8 @@ CudnnRnnParamsDescriptor::CudnnRnnParamsDescriptor(
CUDNN_RETURN_IF_FAIL(status, "Cudnn fails to create RNN filter descriptor");
int dims[] = {static_cast<int>(params_size_in_bytes_), 1, 1};
status = cudnnSetFilterNdDescriptor(
- /*filterDesc=*/handle_, rnn_desc.data_type() /*dataType*/,
- /*format=*/CUDNN_TENSOR_NCHW, sizeof(dims) / sizeof(dims[0]) /*nbDims*/,
+ /*filterDesc=*/handle_, /*dataType=*/rnn_desc.data_type(),
+ /*format=*/CUDNN_TENSOR_NCHW, /*nbDims=*/sizeof(dims) / sizeof(dims[0]),
/*filterDimA=*/dims);
CUDNN_RETURN_IF_FAIL(status, "Cudnn fails to update RNN filter descriptor");
}
@@ -1247,7 +1241,7 @@ CudnnRnnParamsDescriptor::CudnnRnnParamsDescriptor(
void* offset = nullptr;
if (type == 0) {
status = cudnnGetRNNLinLayerMatrixParams(
- cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/,
+ /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
/*layer=*/layer, /*xDesc=*/input_desc, /*wDesc=*/handle_,
/*w=*/nullptr, /*linLayerID=*/region,
/*linLayerMatDesc=*/region_desc_handle,
@@ -1256,7 +1250,7 @@ CudnnRnnParamsDescriptor::CudnnRnnParamsDescriptor(
status, "Cudnn fails to call cudnnGetRNNLinLayerMatrixParams");
} else {
status = cudnnGetRNNLinLayerBiasParams(
- cudnn.handle() /*rnnDesc*/, rnn_desc.handle() /*rnnDesc*/,
+ /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
/*layer=*/layer, /*xDesc=*/input_desc, /*wDesc=*/handle_,
/*w=*/nullptr, /*linLayerID=*/region,
/*linLayerBiasDesc=*/region_desc_handle,
@@ -1270,7 +1264,7 @@ CudnnRnnParamsDescriptor::CudnnRnnParamsDescriptor(
int n_dims;
status = cudnnGetFilterNdDescriptor(
/*filterDesc=*/region_desc_handle,
- sizeof(dims) / sizeof(dims[0]) /*nbDimsRequested*/,
+ /*nbDimsRequested=*/sizeof(dims) / sizeof(dims[0]),
/*dataType=*/&data_type, /*format=*/&tensor_format,
/*nbDims=*/&n_dims, /*filterDimA=*/dims);
CUDNN_RETURN_IF_FAIL(status, "Cudnn fails to get filter description");
@@ -1338,7 +1332,7 @@ class CudnnRnnSequenceTensorDescriptor
int strides[] = {dims[1] * dims[2], dims[2], 1};
status = cudnnSetTensorNdDescriptor(
/*tensorDesc=*/handle, /*dataType=*/data_type,
- sizeof(dims) / sizeof(dims[0]) /*nbDims*/, /*dimA=*/dims,
+ /*nbDims=*/sizeof(dims) / sizeof(dims[0]), /*dimA=*/dims,
/*strideA=*/strides);
CUDNN_RETURN_IF_FAIL(status, "Failed to update tensor descriptor");
// Replicate handle across the number of steps.
@@ -1390,7 +1384,7 @@ class CudnnRnnStateTensorDescriptor
int strides[] = {dims[1] * dims[2], dims[2], 1};
status = cudnnSetTensorNdDescriptor(
/*tensorDesc=*/handle_, /*dataType=*/data_type,
- sizeof(dims) / sizeof(dims[0]) /*nbDims*/, /*dimA=*/dims,
+ /*nbDims=*/sizeof(dims) / sizeof(dims[0]), /*dimA=*/dims,
/*strideA=*/strides);
CUDNN_RETURN_IF_FAIL(status, "Failed to update tensor descriptor");
}
@@ -1497,9 +1491,9 @@ bool CheckRNNParameterSize(const CudnnHandle& cudnn,
const CudnnRnnSequenceTensorDescriptor& input_desc) {
size_t params_size_in_bytes = 0;
cudnnStatus_t status = cudnnGetRNNParamsSize(
- /*handle=*/cudnn.handle(), rnn_desc.handle() /*rnnDesc*/,
- input_desc.handles()[0] /*xDesc*/, /*sizeInBytes=*/&params_size_in_bytes,
- rnn_desc.data_type() /*dataType*/);
+ /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+ /*xDesc=*/input_desc.handles()[0], /*sizeInBytes=*/&params_size_in_bytes,
+ /*dataType=*/rnn_desc.data_type());
if (status != CUDNN_STATUS_SUCCESS) {
LOG(ERROR) << "Unable to check RNN param size: " << ToString(status);
return false;
@@ -1592,8 +1586,8 @@ bool CudnnSupport::DoRnnForwardImpl(
if (is_training) {
size_t reserve_space_size_in_bytes = 0;
cudnnStatus_t status = cudnnGetRNNTrainingReserveSize(
- cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/,
- /*seqLength=*/model_dims.seq_length, input_desc.handles() /*xDesc*/,
+ /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+ /*seqLength=*/model_dims.seq_length, /*xDesc=*/input_desc.handles(),
/*sizeInBytes=*/&reserve_space_size_in_bytes);
if (status != CUDNN_STATUS_SUCCESS) {
LOG(ERROR) << "Unable to query reserve space size: " << ToString(status);
@@ -1630,30 +1624,30 @@ bool CudnnSupport::DoRnnForwardImpl(
cudnnStatus_t status;
if (!is_training) {
status = cudnnRNNForwardInference(
- cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/,
- model_dims.seq_length /*seqLength*/, input_desc.handles() /*xDesc*/,
- input_data.opaque() /*x*/, input_h_desc.handle() /*hxDesc*/,
- input_h_data.opaque() /*hx*/, input_c_desc.handle() /*cxDesc*/,
- input_c_data.opaque() /*cx*/, rnn_desc.params_handle() /*wDesc*/,
- params.opaque() /*w*/, output_desc.handles() /*yDesc*/,
- output_data->opaque() /*y*/, output_h_desc.handle() /*hyDesc*/,
- output_h_data->opaque() /*hy*/, output_c_desc.handle() /*cyDesc*/,
- output_c_data->opaque() /*cy*/, workspace.opaque() /*workspace*/,
- workspace.size() /*workSpaceSizeInBytes*/);
+ /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+ /*seqLength=*/model_dims.seq_length, /*xDesc=*/input_desc.handles(),
+ /*x=*/input_data.opaque(), /*hxDesc=*/input_h_desc.handle(),
+ /*hx=*/input_h_data.opaque(), /*cxDesc=*/input_c_desc.handle(),
+ /*cx=*/input_c_data.opaque(), /*wDesc=*/rnn_desc.params_handle(),
+ /*w=*/params.opaque(), /*yDesc=*/output_desc.handles(),
+ /*y=*/output_data->opaque(), /*hyDesc=*/output_h_desc.handle(),
+ /*hy=*/output_h_data->opaque(), /*cyDesc=*/output_c_desc.handle(),
+ /*cy=*/output_c_data->opaque(), /*workspace=*/workspace.opaque(),
+ /*workSpaceSizeInBytes=*/workspace.size());
} else {
status = cudnnRNNForwardTraining(
- cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/,
- model_dims.seq_length /*seqLength*/, input_desc.handles() /*xDesc*/,
- input_data.opaque() /*x*/, input_h_desc.handle() /*hxDesc*/,
- input_h_data.opaque() /*hx*/, input_c_desc.handle() /*cxDesc*/,
- input_c_data.opaque() /*cx*/, rnn_desc.params_handle() /*wDesc*/,
- params.opaque() /*w*/, output_desc.handles() /*yDesc*/,
- output_data->opaque() /*y*/, output_h_desc.handle() /*hyDesc*/,
- output_h_data->opaque() /*hy*/, output_c_desc.handle() /*cyDesc*/,
- output_c_data->opaque() /*cy*/, workspace.opaque() /*workspace*/,
- workspace.size() /*workSpaceSizeInBytes*/,
- reserve_space.opaque() /*reserveSpace*/,
- reserve_space.size() /*reserveSpaceSizeInBytes*/);
+ /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+ /*seqLength=*/model_dims.seq_length, /*xDesc=*/input_desc.handles(),
+ /*x=*/input_data.opaque(), /*hxDesc=*/input_h_desc.handle(),
+ /*hx=*/input_h_data.opaque(), /*cxDesc=*/input_c_desc.handle(),
+ /*cx=*/input_c_data.opaque(), /*wDesc=*/rnn_desc.params_handle(),
+ /*w=*/params.opaque(), /*yDesc=*/output_desc.handles(),
+ /*y=*/output_data->opaque(), /*hyDesc=*/output_h_desc.handle(),
+ /*hy=*/output_h_data->opaque(), /*cyDesc=*/output_c_desc.handle(),
+ /*cy=*/output_c_data->opaque(), /*workspace=*/workspace.opaque(),
+ /*workSpaceSizeInBytes=*/workspace.size(),
+ /*reserveSpace=*/reserve_space.opaque(),
+ /*reserveSpaceSizeInBytes=*/reserve_space.size());
}
if (is_profiling) {
if (!timer->Stop(AsCUDAStream(stream))) {
@@ -1748,24 +1742,24 @@ bool CudnnSupport::DoRnnBackwardImpl(
}
// make the backward data call
cudnnStatus_t status = cudnnRNNBackwardData(
- cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/,
- model_dims.seq_length /*seqLength*/, output_desc.handles() /*yDesc*/,
- output_data.opaque() /*y*/, output_desc.handles() /*dyDesc*/,
- output_backprop_data.opaque() /*dy*/, output_h_desc.handle() /*dhyDesc*/,
- output_h_backprop_data.opaque() /*dhy*/,
- output_c_desc.handle() /*dcyDesc*/,
- output_c_backprop_data.opaque() /*dcy*/,
- rnn_desc.params_handle() /*wDesc*/, params.opaque() /*w*/,
- input_h_desc.handle() /*hxDesc*/, input_h_data.opaque() /*hx*/,
- input_c_desc.handle() /*cxDesc*/, input_c_data.opaque() /*cx*/,
- input_desc.handles() /*dxDesc*/, input_backprop_data->opaque() /*dx*/,
- input_h_desc.handle() /*dhxDesc*/,
- input_h_backprop_data->opaque() /*dhx*/,
- input_c_desc.handle() /*dcxDesc*/,
- input_c_backprop_data->opaque() /*dcx*/, workspace.opaque() /*workspace*/,
- workspace.size() /*workSpaceSizeInBytes*/,
- reserve_space_data->opaque() /*reserveSpace*/,
- reserve_space_data->size() /*reserveSpaceSizeInBytes*/);
+ /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+ /*seqLength=*/model_dims.seq_length, /*yDesc=*/output_desc.handles(),
+ /*y=*/output_data.opaque(), /*dyDesc=*/output_desc.handles(),
+ /*dy=*/output_backprop_data.opaque(), /*dhyDesc=*/output_h_desc.handle(),
+ /*dhy=*/output_h_backprop_data.opaque(),
+ /*dcyDesc=*/output_c_desc.handle(),
+ /*dcy=*/output_c_backprop_data.opaque(),
+ /*wDesc=*/rnn_desc.params_handle(), /*w=*/params.opaque(),
+ /*hxDesc=*/input_h_desc.handle(), /*hx=*/input_h_data.opaque(),
+ /*cxDesc=*/input_c_desc.handle(), /*cx=*/input_c_data.opaque(),
+ /*dxDesc=*/input_desc.handles(), /*dx=*/input_backprop_data->opaque(),
+ /*dhxDesc=*/input_h_desc.handle(),
+ /*dhx=*/input_h_backprop_data->opaque(),
+ /*dcxDesc=*/input_c_desc.handle(),
+ /*dcx=*/input_c_backprop_data->opaque(), /*workspace=*/workspace.opaque(),
+ /*workSpaceSizeInBytes=*/workspace.size(),
+ /*reserveSpace=*/reserve_space_data->opaque(),
+ /*reserveSpaceSizeInBytes=*/reserve_space_data->size());
if (status != CUDNN_STATUS_SUCCESS) {
if (is_profiling) {
@@ -1780,16 +1774,16 @@ bool CudnnSupport::DoRnnBackwardImpl(
stream->ThenMemZero(params_backprop_data, params_backprop_data->size());
// make the backward weight call
status = cudnnRNNBackwardWeights(
- cudnn.handle() /*handle*/, rnn_desc.handle() /*rnnDesc*/,
- model_dims.seq_length /*seqLength*/, input_desc.handles() /*xDesc*/,
- input_data.opaque() /*x*/, input_h_desc.handle() /*hxDesc*/,
- input_h_data.opaque() /*hx*/, output_desc.handles() /*yDesc*/,
- output_data.opaque() /*y*/, workspace.opaque() /*workspace*/,
- workspace.size() /*workSpaceSizeInBytes*/,
- rnn_desc.params_handle() /*dwDesc*/,
- params_backprop_data->opaque() /*dw*/,
- reserve_space_data->opaque() /*reserveSpace*/,
- reserve_space_data->size() /*reserveSpaceSizeInBytes*/);
+ /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+ /*seqLength=*/model_dims.seq_length, /*xDesc=*/input_desc.handles(),
+ /*x=*/input_data.opaque(), /*hxDesc=*/input_h_desc.handle(),
+ /*hx=*/input_h_data.opaque(), /*yDesc=*/output_desc.handles(),
+ /*y=*/output_data.opaque(), /*workspace=*/workspace.opaque(),
+ /*workSpaceSizeInBytes=*/workspace.size(),
+ /*dwDesc=*/rnn_desc.params_handle(),
+ /*dw=*/params_backprop_data->opaque(),
+ /*reserveSpace=*/reserve_space_data->opaque(),
+ /*reserveSpaceSizeInBytes=*/reserve_space_data->size());
if (status != CUDNN_STATUS_SUCCESS) {
if (is_profiling) {
timer->Stop(AsCUDAStream(stream));
@@ -2415,12 +2409,12 @@ cudnnDataType_t GetRnnComputeType(dnn::DataType data_type) {
template <class T>
bool CudnnSupport::DoConvolveImpl(
- Stream* stream, const BatchDescriptor& input_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<T>& input_data,
- const FilterDescriptor& filter_descriptor,
+ const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<T>& filter_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& output_descriptor, DeviceMemory<T>* output_data,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor, DeviceMemory<T>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
@@ -3038,13 +3032,13 @@ bool CudnnSupport::DoBatchNormalizationBackwardImpl(
}
bool CudnnSupport::DoConvolve(
- Stream* stream, const BatchDescriptor& batch_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<float>& input_data,
- const FilterDescriptor& filter_descriptor,
+ const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<float>& filter_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& output_descriptor, DeviceMemory<float>* output_data,
- ScratchAllocator* scratch_allocator,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return DoConvolveImpl<float>(
@@ -3054,13 +3048,13 @@ bool CudnnSupport::DoConvolve(
}
bool CudnnSupport::DoConvolve(
- Stream* stream, const BatchDescriptor& batch_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<double>& input_data,
- const FilterDescriptor& filter_descriptor,
+ const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<double>& filter_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& output_descriptor, DeviceMemory<double>* output_data,
- ScratchAllocator* scratch_allocator,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return DoConvolveImpl<double>(
@@ -3070,12 +3064,12 @@ bool CudnnSupport::DoConvolve(
}
bool CudnnSupport::DoConvolve(
- Stream* stream, const BatchDescriptor& batch_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<Eigen::half>& input_data,
- const FilterDescriptor& filter_descriptor,
+ const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& output_descriptor,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
@@ -3202,7 +3196,8 @@ namespace {
template <class T>
DeviceMemory<T> MaybeTransformLayout(
Stream* stream, const CudnnHandle& cudnn,
- BatchDescriptor* output_descriptor, DeviceMemory<T> backward_output_data,
+ dnn::BatchDescriptor* output_descriptor,
+ DeviceMemory<T> backward_output_data,
std::unique_ptr<TemporaryDeviceMemory<T>>* transform_scratch) {
if (output_descriptor->layout() == dnn::DataLayout::kBatchDepthYX) {
return backward_output_data;
@@ -3211,7 +3206,7 @@ DeviceMemory<T> MaybeTransformLayout(
*transform_scratch =
stream->AllocateTemporaryArray<T>(backward_output_data.ElementCount())
.ConsumeValueOrDie();
- BatchDescriptor transformed_output_descriptor;
+ dnn::BatchDescriptor transformed_output_descriptor;
transformed_output_descriptor.CloneFrom(*output_descriptor);
transformed_output_descriptor.set_layout(dnn::DataLayout::kBatchDepthYX);
cudnnDataType_t cudnn_type = GetCudnnDataType<T>();
@@ -3263,12 +3258,12 @@ bool CudnnSupport::DoTransformTensor(Stream* stream,
template <class T>
bool CudnnSupport::DoConvolveBackwardDataImpl(
- Stream* stream, const FilterDescriptor& filter_descriptor,
+ Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<T>& filter_data,
- const BatchDescriptor& output_descriptor_in,
+ const dnn::BatchDescriptor& output_descriptor_in,
DeviceMemory<T> backward_output_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& input_descriptor,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& input_descriptor,
DeviceMemory<T>* backward_input_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
@@ -3287,7 +3282,7 @@ bool CudnnSupport::DoConvolveBackwardDataImpl(
auto cudnn = cudnn_->GetHandle(parent_, stream);
// TBD(keveman): remove once cuDNN supports kBatchYXDepth for backward pass.
- BatchDescriptor output_descriptor;
+ dnn::BatchDescriptor output_descriptor;
output_descriptor.CloneFrom(output_descriptor_in);
std::unique_ptr<TemporaryDeviceMemory<T>> transform_scratch;
backward_output_data =
@@ -3475,12 +3470,12 @@ bool CudnnSupport::DoConvolveBackwardDataImpl(
}
bool CudnnSupport::DoConvolveBackwardData(
- Stream* stream, const FilterDescriptor& filter_descriptor,
+ Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<double>& filter_data,
- const BatchDescriptor& output_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<double> backward_output_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& input_descriptor,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& input_descriptor,
DeviceMemory<double>* backward_input_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
@@ -3493,12 +3488,12 @@ bool CudnnSupport::DoConvolveBackwardData(
}
bool CudnnSupport::DoConvolveBackwardData(
- Stream* stream, const FilterDescriptor& filter_descriptor,
+ Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<float>& filter_data,
- const BatchDescriptor& output_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<float> backward_output_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& input_descriptor,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& input_descriptor,
DeviceMemory<float>* backward_input_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
@@ -3511,12 +3506,12 @@ bool CudnnSupport::DoConvolveBackwardData(
}
bool CudnnSupport::DoConvolveBackwardData(
- Stream* stream, const FilterDescriptor& filter_descriptor,
+ Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
- const BatchDescriptor& output_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half> backward_output_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& input_descriptor,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& input_descriptor,
DeviceMemory<Eigen::half>* backward_input_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
@@ -3554,7 +3549,7 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl(
auto cudnn = cudnn_->GetHandle(parent_, stream);
// TBD(keveman): remove once cuDNN supports kBatchYXDepth for backward pass.
- BatchDescriptor output_descriptor;
+ dnn::BatchDescriptor output_descriptor;
output_descriptor.CloneFrom(output_descriptor_in);
std::unique_ptr<TemporaryDeviceMemory<T>> transform_scratch;
backward_output_data =
@@ -3826,27 +3821,27 @@ bool CudnnSupport::DoConvolveBackwardBiasImpl(
}
bool CudnnSupport::DoConvolveBackwardBias(
- Stream* stream, const BatchDescriptor& input_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<double>& input_data,
- const BatchDescriptor& bias_descriptor,
+ const dnn::BatchDescriptor& bias_descriptor,
DeviceMemory<double>* backward_bias_data) {
return DoConvolveBackwardBiasImpl(stream, input_descriptor, input_data,
bias_descriptor, backward_bias_data);
}
bool CudnnSupport::DoConvolveBackwardBias(
- Stream* stream, const BatchDescriptor& input_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<float>& input_data,
- const BatchDescriptor& bias_descriptor,
+ const dnn::BatchDescriptor& bias_descriptor,
DeviceMemory<float>* backward_bias_data) {
return DoConvolveBackwardBiasImpl(stream, input_descriptor, input_data,
bias_descriptor, backward_bias_data);
}
bool CudnnSupport::DoConvolveBackwardBias(
- Stream* stream, const BatchDescriptor& input_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<Eigen::half>& input_data,
- const BatchDescriptor& bias_descriptor,
+ const dnn::BatchDescriptor& bias_descriptor,
DeviceMemory<Eigen::half>* backward_bias_data) {
return DoConvolveBackwardBiasImpl(stream, input_descriptor, input_data,
bias_descriptor, backward_bias_data);
@@ -3994,7 +3989,7 @@ bool CudnnSupport::DoBiasAdd(Stream* stream,
DeviceMemory<float>* output_data) {
ScopedTensorDescriptor input_descriptor(dimensions, CUDNN_DATA_FLOAT);
- BatchDescriptor bias_dimensions;
+ dnn::BatchDescriptor bias_dimensions;
bias_dimensions.set_count(1)
.set_feature_map_count(dimensions.feature_map_count())
.set_height(1)
@@ -4453,8 +4448,8 @@ bool CudnnSupport::DoMemcpyH2DQuantized(
}
bool CudnnSupport::DeriveOutputBatchDescriptor(
- const BatchDescriptor& batch_descriptor,
- const FilterDescriptor& filter_descriptor,
+ const dnn::BatchDescriptor& batch_descriptor,
+ const dnn::FilterDescriptor& filter_descriptor,
const dnn::ConvolutionDescriptor& convolution_descriptor,
dnn::BatchDescriptor* output_batch_descriptor) {
ScopedTensorDescriptor input_nd(batch_descriptor, CUDNN_DATA_FLOAT);
@@ -4493,9 +4488,8 @@ void initialize_cudnn() {
cuda::CUDAExecutor* cuda_executor =
dynamic_cast<cuda::CUDAExecutor*>(parent);
if (cuda_executor == nullptr) {
- LOG(ERROR)
- << "Attempting to initialize an instance of the cuBLAS "
- << "support library with a non-CUDA StreamExecutor";
+ LOG(ERROR) << "Attempting to initialize an instance of the cuDNN "
+ << "support library with a non-CUDA StreamExecutor";
return nullptr;
}