aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor
diff options
context:
space:
mode:
authorGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-07-16 14:20:38 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-07-16 14:20:48 -0700
commit703e18752e6616cf6bfec358329bb243f0346935 (patch)
tree7904f98a03f0bf1a328ac7445699d33cafb82a20 /tensorflow/stream_executor
parentc1322043a853601ec9561157b23a5c86cdadc689 (diff)
parent456aaa2fdbf821296a31f5493955f4653ae119dd (diff)
Merge pull request #20706 from ROCmSoftwarePlatform:upstream-staging-stream-executor-pooling-interface
PiperOrigin-RevId: 204805678
Diffstat (limited to 'tensorflow/stream_executor')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.cc19
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.h21
-rw-r--r--tensorflow/stream_executor/dnn.h21
-rw-r--r--tensorflow/stream_executor/stream.cc63
-rw-r--r--tensorflow/stream_executor/stream.h21
5 files changed, 93 insertions, 52 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 84916385a8..9e24a4538c 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -3603,7 +3603,7 @@ bool CudnnSupport::DoPoolForward(
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<double>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<double>* output_data) {
+ DeviceMemory<double>* output_data, ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
double alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3628,7 +3628,7 @@ bool CudnnSupport::DoPoolForward(
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<float>* output_data) {
+ DeviceMemory<float>* output_data, ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3653,7 +3653,8 @@ bool CudnnSupport::DoPoolForward(
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<Eigen::half>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<Eigen::half>* output_data) {
+ DeviceMemory<Eigen::half>* output_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3679,7 +3680,8 @@ bool CudnnSupport::DoPoolBackward(
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<double>& output_data,
const DeviceMemory<double>& input_diff_data,
- DeviceMemory<double>* output_diff_data) {
+ DeviceMemory<double>* output_diff_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
double alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3708,7 +3710,8 @@ bool CudnnSupport::DoPoolBackward(
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<float>& output_data,
const DeviceMemory<float>& input_diff_data,
- DeviceMemory<float>* output_diff_data) {
+ DeviceMemory<float>* output_diff_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3737,7 +3740,8 @@ bool CudnnSupport::DoPoolBackward(
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<Eigen::half>& output_data,
const DeviceMemory<Eigen::half>& input_diff_data,
- DeviceMemory<Eigen::half>* output_diff_data) {
+ DeviceMemory<Eigen::half>* output_diff_data,
+ ScratchAllocator* workspace_allocator) {
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
@@ -3806,7 +3810,8 @@ bool CudnnSupport::DoNormalizeBackwardWithDimensions(
const dnn::BatchDescriptor& dimensions, const DeviceMemory<float>& raw_data,
const DeviceMemory<float>& normalized_data,
const DeviceMemory<float>& normalized_variable_gradient,
- DeviceMemory<float>* raw_variable_gradient) {
+ DeviceMemory<float>* raw_variable_gradient,
+ ScratchAllocator* workspace_allocator) {
// Check for unsupported modes.
if (normalize_descriptor.wrap_around()) {
LOG(ERROR) << "CUDA LRN does not support cudnn-around mode";
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index c924d41cb5..9d88f971bb 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -515,21 +515,24 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<double>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<double>* output_data) override;
+ DeviceMemory<double>* output_data,
+ ScratchAllocator* workspace_allocator) override;
bool DoPoolForward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<float>* output_data) override;
+ DeviceMemory<float>* output_data,
+ ScratchAllocator* workspace_allocator) override;
bool DoPoolForward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<Eigen::half>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<Eigen::half>* output_data) override;
+ DeviceMemory<Eigen::half>* output_data,
+ ScratchAllocator* workspace_allocator) override;
bool DoPoolBackward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
@@ -538,7 +541,8 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<double>& output_data,
const DeviceMemory<double>& input_diff_data,
- DeviceMemory<double>* output_diff_data) override;
+ DeviceMemory<double>* output_diff_data,
+ ScratchAllocator* workspace_allocator) override;
bool DoPoolBackward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
@@ -547,7 +551,8 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<float>& output_data,
const DeviceMemory<float>& input_diff_data,
- DeviceMemory<float>* output_diff_data) override;
+ DeviceMemory<float>* output_diff_data,
+ ScratchAllocator* workspace_allocator) override;
bool DoPoolBackward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
@@ -556,7 +561,8 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<Eigen::half>& output_data,
const DeviceMemory<Eigen::half>& input_diff_data,
- DeviceMemory<Eigen::half>* output_diff_data) override;
+ DeviceMemory<Eigen::half>* output_diff_data,
+ ScratchAllocator* workspace_allocator) override;
bool DoNormalize(Stream* stream,
const dnn::NormalizeDescriptor& normalize_descriptor,
@@ -575,7 +581,8 @@ class CudnnSupport : public dnn::DnnSupport {
const DeviceMemory<float>& raw_data,
const DeviceMemory<float>& normalized_data,
const DeviceMemory<float>& normalized_variable_gradient,
- DeviceMemory<float>* raw_variable_gradient) override;
+ DeviceMemory<float>* raw_variable_gradient,
+ ScratchAllocator* workspace_allocator) override;
bool DoDepthConcatenate(
Stream* stream, port::ArraySlice<dnn::BatchDescriptor> input_dimensions,
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 9eca5abe1a..a7449c2df4 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -1552,14 +1552,16 @@ class DnnSupport {
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<float>* output_data) = 0;
+ DeviceMemory<float>* output_data,
+ ScratchAllocator* workspace_allocator) = 0;
virtual bool DoPoolForward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<double>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<double>* output_data) {
+ DeviceMemory<double>* output_data,
+ ScratchAllocator* workspace_allocator) {
LOG(FATAL) << "DoPoolForward not implemented for double.";
return false;
}
@@ -1569,7 +1571,8 @@ class DnnSupport {
const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<Eigen::half>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<Eigen::half>* output_data) {
+ DeviceMemory<Eigen::half>* output_data,
+ ScratchAllocator* workspace_allocator) {
LOG(FATAL) << "DoPoolForward not implemented for float16.";
return false;
}
@@ -1582,7 +1585,8 @@ class DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<double>& output_data,
const DeviceMemory<double>& input_diff_data,
- DeviceMemory<double>* output_diff_data) {
+ DeviceMemory<double>* output_diff_data,
+ ScratchAllocator* workspace_allocator) {
LOG(FATAL) << "DoPoolBackward not implemented.";
return false;
}
@@ -1594,7 +1598,8 @@ class DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<float>& output_data,
const DeviceMemory<float>& input_diff_data,
- DeviceMemory<float>* output_diff_data) {
+ DeviceMemory<float>* output_diff_data,
+ ScratchAllocator* workspace_allocator) {
LOG(FATAL) << "DoPoolBackward not implemented.";
return false;
}
@@ -1606,7 +1611,8 @@ class DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<Eigen::half>& output_data,
const DeviceMemory<Eigen::half>& input_diff_data,
- DeviceMemory<Eigen::half>* output_diff_data) {
+ DeviceMemory<Eigen::half>* output_diff_data,
+ ScratchAllocator* workspace_allocator) {
LOG(FATAL) << "DoPoolBackward not implemented.";
return false;
}
@@ -1653,7 +1659,8 @@ class DnnSupport {
const DeviceMemory<float>& raw_data,
const DeviceMemory<float>& normalized_data,
const DeviceMemory<float>& normalized_variable_gradient,
- DeviceMemory<float>* raw_variable_gradient) {
+ DeviceMemory<float>* raw_variable_gradient,
+ ScratchAllocator* workspace_allocator) {
return false;
}
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 9369183133..ca1b8e28e6 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -1377,15 +1377,16 @@ Stream &Stream::ThenPoolForward(
const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<double> &input_data,
const dnn::BatchDescriptor &output_dimensions,
- DeviceMemory<double> *output_data) {
+ DeviceMemory<double> *output_data, ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
- PARAM(input_data), PARAM(output_dimensions), PARAM(output_data));
+ PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
+ PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
- input_data, output_dimensions,
- output_data));
+ input_data, output_dimensions, output_data,
+ workspace_allocator));
} else {
SetError();
LOG(WARNING)
@@ -1401,15 +1402,16 @@ Stream &Stream::ThenPoolForward(
const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<float> &input_data,
const dnn::BatchDescriptor &output_dimensions,
- DeviceMemory<float> *output_data) {
+ DeviceMemory<float> *output_data, ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
- PARAM(input_data), PARAM(output_dimensions), PARAM(output_data));
+ PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
+ PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
- input_data, output_dimensions,
- output_data));
+ input_data, output_dimensions, output_data,
+ workspace_allocator));
} else {
SetErrorAndLogNoDnnSupport();
}
@@ -1422,15 +1424,17 @@ Stream &Stream::ThenPoolForward(
const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<Eigen::half> &input_data,
const dnn::BatchDescriptor &output_dimensions,
- DeviceMemory<Eigen::half> *output_data) {
+ DeviceMemory<Eigen::half> *output_data,
+ ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
- PARAM(input_data), PARAM(output_dimensions), PARAM(output_data));
+ PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
+ PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
- input_data, output_dimensions,
- output_data));
+ input_data, output_dimensions, output_data,
+ workspace_allocator));
} else {
SetErrorAndLogNoDnnSupport();
}
@@ -1445,16 +1449,19 @@ Stream &Stream::ThenPoolBackward(
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<double> &output_data,
const DeviceMemory<double> &input_diff_data,
- DeviceMemory<double> *output_diff_data) {
+ DeviceMemory<double> *output_diff_data,
+ ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
- PARAM(input_diff_data), PARAM(output_diff_data));
+ PARAM(input_diff_data), PARAM(output_diff_data),
+ PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
input_data, output_dimensions, output_data,
- input_diff_data, output_diff_data));
+ input_diff_data, output_diff_data,
+ workspace_allocator));
} else {
SetError();
LOG(WARNING)
@@ -1472,16 +1479,19 @@ Stream &Stream::ThenPoolBackward(
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<float> &output_data,
const DeviceMemory<float> &input_diff_data,
- DeviceMemory<float> *output_diff_data) {
+ DeviceMemory<float> *output_diff_data,
+ ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
- PARAM(input_diff_data), PARAM(output_diff_data));
+ PARAM(input_diff_data), PARAM(output_diff_data),
+ PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
input_data, output_dimensions, output_data,
- input_diff_data, output_diff_data));
+ input_diff_data, output_diff_data,
+ workspace_allocator));
} else {
SetErrorAndLogNoDnnSupport();
}
@@ -1496,16 +1506,19 @@ Stream &Stream::ThenPoolBackward(
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<Eigen::half> &output_data,
const DeviceMemory<Eigen::half> &input_diff_data,
- DeviceMemory<Eigen::half> *output_diff_data) {
+ DeviceMemory<Eigen::half> *output_diff_data,
+ ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
- PARAM(input_diff_data), PARAM(output_diff_data));
+ PARAM(input_diff_data), PARAM(output_diff_data),
+ PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
input_data, output_dimensions, output_data,
- input_diff_data, output_diff_data));
+ input_diff_data, output_diff_data,
+ workspace_allocator));
} else {
SetErrorAndLogNoDnnSupport();
}
@@ -1552,16 +1565,18 @@ Stream &Stream::ThenNormalizeBackwardWithDimensions(
const dnn::BatchDescriptor &dimensions, const DeviceMemory<float> &raw_data,
const DeviceMemory<float> &normalized_data,
const DeviceMemory<float> &normalized_variable_gradient,
- DeviceMemory<float> *raw_variable_gradient) {
+ DeviceMemory<float> *raw_variable_gradient,
+ ScratchAllocator *workspace_allocator) {
VLOG_CALL(PARAM(normalize_descriptor), PARAM(dimensions), PARAM(raw_data),
PARAM(normalized_data), PARAM(normalized_variable_gradient),
- PARAM(raw_variable_gradient));
+ PARAM(raw_variable_gradient), PARAM(workspace_allocator));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoNormalizeBackwardWithDimensions(
this, normalize_descriptor, dimensions, raw_data, normalized_data,
- normalized_variable_gradient, raw_variable_gradient));
+ normalized_variable_gradient, raw_variable_gradient,
+ workspace_allocator));
} else {
SetErrorAndLogNoDnnSupport();
}
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index e8885e1eb6..63d64947c8 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -629,19 +629,22 @@ class Stream {
const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<double> &input_data,
const dnn::BatchDescriptor &output_dimensions,
- DeviceMemory<double> *output_data);
+ DeviceMemory<double> *output_data,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenPoolForward(const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<float> &input_data,
const dnn::BatchDescriptor &output_dimensions,
- DeviceMemory<float> *output_data);
+ DeviceMemory<float> *output_data,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenPoolForward(const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<Eigen::half> &input_data,
const dnn::BatchDescriptor &output_dimensions,
- DeviceMemory<Eigen::half> *output_data);
+ DeviceMemory<Eigen::half> *output_data,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
@@ -649,7 +652,8 @@ class Stream {
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<double> &output_data,
const DeviceMemory<double> &input_diff_data,
- DeviceMemory<double> *output_diff_data);
+ DeviceMemory<double> *output_diff_data,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
@@ -657,7 +661,8 @@ class Stream {
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<float> &output_data,
const DeviceMemory<float> &input_diff_data,
- DeviceMemory<float> *output_diff_data);
+ DeviceMemory<float> *output_diff_data,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
@@ -665,7 +670,8 @@ class Stream {
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<Eigen::half> &output_data,
const DeviceMemory<Eigen::half> &input_diff_data,
- DeviceMemory<Eigen::half> *output_diff_data);
+ DeviceMemory<Eigen::half> *output_diff_data,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenNormalize(const dnn::NormalizeDescriptor &normalize_descriptor,
const DeviceMemory<float> &input_data,
@@ -684,7 +690,8 @@ class Stream {
const DeviceMemory<float> &raw_data,
const DeviceMemory<float> &normalized_data,
const DeviceMemory<float> &normalized_variable_gradient,
- DeviceMemory<float> *raw_variable_gradient);
+ DeviceMemory<float> *raw_variable_gradient,
+ ScratchAllocator *workspace_allocator = nullptr);
Stream &ThenActivate(dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &dimensions,