aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor
diff options
context:
space:
mode:
authorGravatar Wen-Heng (Jack) Chung <whchung@gmail.com>2018-07-11 18:35:43 +0000
committerGravatar Wen-Heng (Jack) Chung <whchung@gmail.com>2018-07-11 20:34:36 +0000
commit456aaa2fdbf821296a31f5493955f4653ae119dd (patch)
tree68f2f260cb66dd135a9c207006d721910d272e36 /tensorflow/stream_executor
parent135e419e780423a888ddd45e479129493336c52b (diff)
[ROCm] Interface changes for pooling APIs in StreamExecutor
Due to the design of MIOpen, the DNN library on ROCm platform, an instance of ScratchAllocator has to be passed into pooling routines. This commit address such interface changes and the implementation in CUDA StreamExecutor.
Diffstat (limited to 'tensorflow/stream_executor')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.cc21
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.h21
-rw-r--r--tensorflow/stream_executor/dnn.h21
-rw-r--r--tensorflow/stream_executor/stream.cc59
-rw-r--r--tensorflow/stream_executor/stream.h21
5 files changed, 94 insertions, 49 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 84916385a8..c12eb1c61f 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -3603,7 +3603,8 @@ bool CudnnSupport::DoPoolForward(
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<double>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<double>* output_data) {
+ DeviceMemory<double>* output_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
double alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3628,7 +3629,8 @@ bool CudnnSupport::DoPoolForward(
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<float>* output_data) {
+ DeviceMemory<float>* output_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3653,7 +3655,8 @@ bool CudnnSupport::DoPoolForward(
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<Eigen::half>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<Eigen::half>* output_data) {
+ DeviceMemory<Eigen::half>* output_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3679,7 +3682,8 @@ bool CudnnSupport::DoPoolBackward(
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<double>& output_data,
const DeviceMemory<double>& input_diff_data,
- DeviceMemory<double>* output_diff_data) {
+ DeviceMemory<double>* output_diff_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
double alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3708,7 +3712,8 @@ bool CudnnSupport::DoPoolBackward(
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<float>& output_data,
const DeviceMemory<float>& input_diff_data,
- DeviceMemory<float>* output_diff_data) {
+ DeviceMemory<float>* output_diff_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3737,7 +3742,8 @@ bool CudnnSupport::DoPoolBackward(
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<Eigen::half>& output_data,
const DeviceMemory<Eigen::half>& input_diff_data,
- DeviceMemory<Eigen::half>* output_diff_data) {
+ DeviceMemory<Eigen::half>* output_diff_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3806,7 +3812,8 @@ bool CudnnSupport::DoNormalizeBackwardWithDimensions(
const dnn::BatchDescriptor& dimensions, const DeviceMemory<float>& raw_data,
const DeviceMemory<float>& normalized_data,
const DeviceMemory<float>& normalized_variable_gradient,
- DeviceMemory<float>* raw_variable_gradient) {
+ DeviceMemory<float>* raw_variable_gradient,
+ ScratchAllocator* workspace_allocator) {
// Check for unsupported modes.
if (normalize_descriptor.wrap_around()) {
LOG(ERROR) << "CUDA LRN does not support cudnn-around mode";
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index c924d41cb5..77f9223710 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -515,21 +515,24 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<double>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<double>* output_data) override;
+ DeviceMemory<double>* output_data,
+ ScratchAllocator* workspace_allocator = nullptr) override;
bool DoPoolForward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<float>* output_data) override;
+ DeviceMemory<float>* output_data,
+ ScratchAllocator* workspace_allocator = nullptr) override;
bool DoPoolForward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<Eigen::half>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<Eigen::half>* output_data) override;
+ DeviceMemory<Eigen::half>* output_data,
+ ScratchAllocator* workspace_allocator = nullptr) override;
bool DoPoolBackward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
@@ -538,7 +541,8 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<double>& output_data,
const DeviceMemory<double>& input_diff_data,
- DeviceMemory<double>* output_diff_data) override;
+ DeviceMemory<double>* output_diff_data,
+ ScratchAllocator* workspace_allocator = nullptr) override;
bool DoPoolBackward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
@@ -547,7 +551,8 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<float>& output_data,
const DeviceMemory<float>& input_diff_data,
- DeviceMemory<float>* output_diff_data) override;
+ DeviceMemory<float>* output_diff_data,
+ ScratchAllocator* workspace_allocator = nullptr) override;
bool DoPoolBackward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
@@ -556,7 +561,8 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<Eigen::half>& output_data,
const DeviceMemory<Eigen::half>& input_diff_data,
- DeviceMemory<Eigen::half>* output_diff_data) override;
+ DeviceMemory<Eigen::half>* output_diff_data,
+ ScratchAllocator* workspace_allocator = nullptr) override;
bool DoNormalize(Stream* stream,
const dnn::NormalizeDescriptor& normalize_descriptor,
@@ -575,7 +581,8 @@ class CudnnSupport : public dnn::DnnSupport {
const DeviceMemory<float>& raw_data,
const DeviceMemory<float>& normalized_data,
const DeviceMemory<float>& normalized_variable_gradient,
- DeviceMemory<float>* raw_variable_gradient) override;
+ DeviceMemory<float>* raw_variable_gradient,
+ ScratchAllocator* workspace_allocator = nullptr) override;
bool DoDepthConcatenate(
Stream* stream, port::ArraySlice<dnn::BatchDescriptor> input_dimensions,
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 9eca5abe1a..75705e2b49 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -1552,14 +1552,16 @@ class DnnSupport {
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<float>* output_data) = 0;
+ DeviceMemory<float>* output_data,
+ ScratchAllocator* workspace_allocator = nullptr) = 0;
virtual bool DoPoolForward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<double>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<double>* output_data) {
+ DeviceMemory<double>* output_data,
+ ScratchAllocator* workspace_allocator = nullptr) {
LOG(FATAL) << "DoPoolForward not implemented for double.";
return false;
}
@@ -1569,7 +1571,8 @@ class DnnSupport {
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<Eigen::half>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<Eigen::half>* output_data) {
+ DeviceMemory<Eigen::half>* output_data,
+ ScratchAllocator* workspace_allocator = nullptr) {
LOG(FATAL) << "DoPoolForward not implemented for float16.";
return false;
}
@@ -1582,7 +1585,8 @@ class DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<double>& output_data,
const DeviceMemory<double>& input_diff_data,
- DeviceMemory<double>* output_diff_data) {
+ DeviceMemory<double>* output_diff_data,
+ ScratchAllocator* workspace_allocator = nullptr) {
LOG(FATAL) << "DoPoolBackward not implemented.";
return false;
}
@@ -1594,7 +1598,8 @@ class DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<float>& output_data,
const DeviceMemory<float>& input_diff_data,
- DeviceMemory<float>* output_diff_data) {
+ DeviceMemory<float>* output_diff_data,
+ ScratchAllocator* workspace_allocator = nullptr) {
LOG(FATAL) << "DoPoolBackward not implemented.";
return false;
}
@@ -1606,7 +1611,8 @@ class DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<Eigen::half>& output_data,
const DeviceMemory<Eigen::half>& input_diff_data,
- DeviceMemory<Eigen::half>* output_diff_data) {
+ DeviceMemory<Eigen::half>* output_diff_data,
+ ScratchAllocator* workspace_allocator = nullptr) {
LOG(FATAL) << "DoPoolBackward not implemented.";
return false;
}
@@ -1653,7 +1659,8 @@ class DnnSupport {
const DeviceMemory<float>& raw_data,
const DeviceMemory<float>& normalized_data,
const DeviceMemory<float>& normalized_variable_gradient,
- DeviceMemory<float>* raw_variable_gradient) {
+ DeviceMemory<float>* raw_variable_gradient,
+ ScratchAllocator* workspace_allocator = nullptr) {
return false;
}
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 9369183133..3c285a9416 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -1377,15 +1377,17 @@ Stream &Stream::ThenPoolForward(
const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<double> &input_data,
const dnn::BatchDescriptor &output_dimensions,
- DeviceMemory<double> *output_data) {
+ DeviceMemory<double> *output_data,
+ ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
- PARAM(input_data), PARAM(output_dimensions), PARAM(output_data));
+ PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
+ PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
input_data, output_dimensions,
- output_data));
+ output_data, workspace_allocator));
} else {
SetError();
LOG(WARNING)
@@ -1401,15 +1403,17 @@ Stream &Stream::ThenPoolForward(
const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<float> &input_data,
const dnn::BatchDescriptor &output_dimensions,
- DeviceMemory<float> *output_data) {
+ DeviceMemory<float> *output_data,
+ ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
- PARAM(input_data), PARAM(output_dimensions), PARAM(output_data));
+ PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
+ PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
input_data, output_dimensions,
- output_data));
+ output_data, workspace_allocator));
} else {
SetErrorAndLogNoDnnSupport();
}
@@ -1422,15 +1426,17 @@ Stream &Stream::ThenPoolForward(
const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<Eigen::half> &input_data,
const dnn::BatchDescriptor &output_dimensions,
- DeviceMemory<Eigen::half> *output_data) {
+ DeviceMemory<Eigen::half> *output_data,
+ ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
- PARAM(input_data), PARAM(output_dimensions), PARAM(output_data));
+ PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
+ PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
input_data, output_dimensions,
- output_data));
+ output_data, workspace_allocator));
} else {
SetErrorAndLogNoDnnSupport();
}
@@ -1445,16 +1451,19 @@ Stream &Stream::ThenPoolBackward(
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<double> &output_data,
const DeviceMemory<double> &input_diff_data,
- DeviceMemory<double> *output_diff_data) {
+ DeviceMemory<double> *output_diff_data,
+ ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
- PARAM(input_diff_data), PARAM(output_diff_data));
+ PARAM(input_diff_data), PARAM(output_diff_data),
+ PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
input_data, output_dimensions, output_data,
- input_diff_data, output_diff_data));
+ input_diff_data, output_diff_data,
+ workspace_allocator));
} else {
SetError();
LOG(WARNING)
@@ -1472,16 +1481,19 @@ Stream &Stream::ThenPoolBackward(
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<float> &output_data,
const DeviceMemory<float> &input_diff_data,
- DeviceMemory<float> *output_diff_data) {
+ DeviceMemory<float> *output_diff_data,
+ ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
- PARAM(input_diff_data), PARAM(output_diff_data));
+ PARAM(input_diff_data), PARAM(output_diff_data),
+ PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
input_data, output_dimensions, output_data,
- input_diff_data, output_diff_data));
+ input_diff_data, output_diff_data,
+ workspace_allocator));
} else {
SetErrorAndLogNoDnnSupport();
}
@@ -1496,16 +1508,19 @@ Stream &Stream::ThenPoolBackward(
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<Eigen::half> &output_data,
const DeviceMemory<Eigen::half> &input_diff_data,
- DeviceMemory<Eigen::half> *output_diff_data) {
+ DeviceMemory<Eigen::half> *output_diff_data,
+ ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
- PARAM(input_diff_data), PARAM(output_diff_data));
+ PARAM(input_diff_data), PARAM(output_diff_data),
+ PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
input_data, output_dimensions, output_data,
- input_diff_data, output_diff_data));
+ input_diff_data, output_diff_data,
+ workspace_allocator));
} else {
SetErrorAndLogNoDnnSupport();
}
@@ -1552,16 +1567,18 @@ Stream &Stream::ThenNormalizeBackwardWithDimensions(
const dnn::BatchDescriptor &dimensions, const DeviceMemory<float> &raw_data,
const DeviceMemory<float> &normalized_data,
const DeviceMemory<float> &normalized_variable_gradient,
- DeviceMemory<float> *raw_variable_gradient) {
+ DeviceMemory<float> *raw_variable_gradient,
+ ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(normalize_descriptor), PARAM(dimensions), PARAM(raw_data),
PARAM(normalized_data), PARAM(normalized_variable_gradient),
- PARAM(raw_variable_gradient));
+ PARAM(raw_variable_gradient), PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoNormalizeBackwardWithDimensions(
this, normalize_descriptor, dimensions, raw_data, normalized_data,
- normalized_variable_gradient, raw_variable_gradient));
+ normalized_variable_gradient, raw_variable_gradient,
+ workspace_allocator));
} else {
SetErrorAndLogNoDnnSupport();
}
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index e8885e1eb6..63d64947c8 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -629,19 +629,22 @@ class Stream {
const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<double> &input_data,
const dnn::BatchDescriptor &output_dimensions,
- DeviceMemory<double> *output_data);
+ DeviceMemory<double> *output_data,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenPoolForward(const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<float> &input_data,
const dnn::BatchDescriptor &output_dimensions,
- DeviceMemory<float> *output_data);
+ DeviceMemory<float> *output_data,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenPoolForward(const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<Eigen::half> &input_data,
const dnn::BatchDescriptor &output_dimensions,
- DeviceMemory<Eigen::half> *output_data);
+ DeviceMemory<Eigen::half> *output_data,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
@@ -649,7 +652,8 @@ class Stream {
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<double> &output_data,
const DeviceMemory<double> &input_diff_data,
- DeviceMemory<double> *output_diff_data);
+ DeviceMemory<double> *output_diff_data,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
@@ -657,7 +661,8 @@ class Stream {
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<float> &output_data,
const DeviceMemory<float> &input_diff_data,
- DeviceMemory<float> *output_diff_data);
+ DeviceMemory<float> *output_diff_data,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
@@ -665,7 +670,8 @@ class Stream {
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<Eigen::half> &output_data,
const DeviceMemory<Eigen::half> &input_diff_data,
- DeviceMemory<Eigen::half> *output_diff_data);
+ DeviceMemory<Eigen::half> *output_diff_data,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenNormalize(const dnn::NormalizeDescriptor &normalize_descriptor,
const DeviceMemory<float> &input_data,
@@ -684,7 +690,8 @@ class Stream {
const DeviceMemory<float> &raw_data,
const DeviceMemory<float> &normalized_data,
const DeviceMemory<float> &normalized_variable_gradient,
- DeviceMemory<float> *raw_variable_gradient);
+ DeviceMemory<float> *raw_variable_gradient,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenActivate(dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &dimensions,