diff options
author | TensorFlower Gardener <gardener@tensorflow.org> | 2018-07-16 14:20:38 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-07-16 14:20:48 -0700 |
commit | 703e18752e6616cf6bfec358329bb243f0346935 (patch) | |
tree | 7904f98a03f0bf1a328ac7445699d33cafb82a20 /tensorflow/stream_executor | |
parent | c1322043a853601ec9561157b23a5c86cdadc689 (diff) | |
parent | 456aaa2fdbf821296a31f5493955f4653ae119dd (diff) |
Merge pull request #20706 from ROCmSoftwarePlatform:upstream-staging-stream-executor-pooling-interface
PiperOrigin-RevId: 204805678
Diffstat (limited to 'tensorflow/stream_executor')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.cc | 19 | ||||
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.h | 21 | ||||
-rw-r--r-- | tensorflow/stream_executor/dnn.h | 21 | ||||
-rw-r--r-- | tensorflow/stream_executor/stream.cc | 63 | ||||
-rw-r--r-- | tensorflow/stream_executor/stream.h | 21 |
5 files changed, 93 insertions, 52 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 84916385a8..9e24a4538c 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -3603,7 +3603,7 @@ bool CudnnSupport::DoPoolForward( const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<double>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<double>* output_data) { + DeviceMemory<double>* output_data, ScratchAllocator* workspace_allocator) { // Alpha is the scaling factor for input. double alpha = 1.0; // Beta is the scaling factor for output. @@ -3628,7 +3628,7 @@ bool CudnnSupport::DoPoolForward( const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<float>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<float>* output_data) { + DeviceMemory<float>* output_data, ScratchAllocator* workspace_allocator) { // Alpha is the scaling factor for input. float alpha = 1.0; // Beta is the scaling factor for output. @@ -3653,7 +3653,8 @@ bool CudnnSupport::DoPoolForward( const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<Eigen::half>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<Eigen::half>* output_data) { + DeviceMemory<Eigen::half>* output_data, + ScratchAllocator* workspace_allocator) { // Alpha is the scaling factor for input. float alpha = 1.0; // Beta is the scaling factor for output. @@ -3679,7 +3680,8 @@ bool CudnnSupport::DoPoolBackward( const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<double>& output_data, const DeviceMemory<double>& input_diff_data, - DeviceMemory<double>* output_diff_data) { + DeviceMemory<double>* output_diff_data, + ScratchAllocator* workspace_allocator) { // Alpha is the scaling factor for input. double alpha = 1.0; // Beta is the scaling factor for output. @@ -3708,7 +3710,8 @@ bool CudnnSupport::DoPoolBackward( const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<float>& output_data, const DeviceMemory<float>& input_diff_data, - DeviceMemory<float>* output_diff_data) { + DeviceMemory<float>* output_diff_data, + ScratchAllocator* workspace_allocator) { // Alpha is the scaling factor for input. float alpha = 1.0; // Beta is the scaling factor for output. @@ -3737,7 +3740,8 @@ bool CudnnSupport::DoPoolBackward( const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<Eigen::half>& output_data, const DeviceMemory<Eigen::half>& input_diff_data, - DeviceMemory<Eigen::half>* output_diff_data) { + DeviceMemory<Eigen::half>* output_diff_data, + ScratchAllocator* workspace_allocator) { // Alpha is the scaling factor for input. float alpha = 1.0; // Beta is the scaling factor for output. @@ -3806,7 +3810,8 @@ bool CudnnSupport::DoNormalizeBackwardWithDimensions( const dnn::BatchDescriptor& dimensions, const DeviceMemory<float>& raw_data, const DeviceMemory<float>& normalized_data, const DeviceMemory<float>& normalized_variable_gradient, - DeviceMemory<float>* raw_variable_gradient) { + DeviceMemory<float>* raw_variable_gradient, + ScratchAllocator* workspace_allocator) { // Check for unsupported modes. if (normalize_descriptor.wrap_around()) { LOG(ERROR) << "CUDA LRN does not support cudnn-around mode"; diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h index c924d41cb5..9d88f971bb 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.h +++ b/tensorflow/stream_executor/cuda/cuda_dnn.h @@ -515,21 +515,24 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<double>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<double>* output_data) override; + DeviceMemory<double>* output_data, + ScratchAllocator* workspace_allocator) override; bool DoPoolForward(Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<float>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<float>* output_data) override; + DeviceMemory<float>* output_data, + ScratchAllocator* workspace_allocator) override; bool DoPoolForward(Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<Eigen::half>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<Eigen::half>* output_data) override; + DeviceMemory<Eigen::half>* output_data, + ScratchAllocator* workspace_allocator) override; bool DoPoolBackward(Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, @@ -538,7 +541,8 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<double>& output_data, const DeviceMemory<double>& input_diff_data, - DeviceMemory<double>* output_diff_data) override; + DeviceMemory<double>* output_diff_data, + ScratchAllocator* workspace_allocator) override; bool DoPoolBackward(Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, @@ -547,7 +551,8 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<float>& output_data, const DeviceMemory<float>& input_diff_data, - DeviceMemory<float>* output_diff_data) override; + DeviceMemory<float>* output_diff_data, + ScratchAllocator* workspace_allocator) override; bool DoPoolBackward(Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, @@ -556,7 +561,8 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<Eigen::half>& output_data, const DeviceMemory<Eigen::half>& input_diff_data, - DeviceMemory<Eigen::half>* output_diff_data) override; + DeviceMemory<Eigen::half>* output_diff_data, + ScratchAllocator* workspace_allocator) override; bool DoNormalize(Stream* stream, const dnn::NormalizeDescriptor& normalize_descriptor, @@ -575,7 +581,8 @@ class CudnnSupport : public dnn::DnnSupport { const DeviceMemory<float>& raw_data, const DeviceMemory<float>& normalized_data, const DeviceMemory<float>& normalized_variable_gradient, - DeviceMemory<float>* raw_variable_gradient) override; + DeviceMemory<float>* raw_variable_gradient, + ScratchAllocator* workspace_allocator) override; bool DoDepthConcatenate( Stream* stream, port::ArraySlice<dnn::BatchDescriptor> input_dimensions, diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 9eca5abe1a..a7449c2df4 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -1552,14 +1552,16 @@ class DnnSupport { const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<float>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<float>* output_data) = 0; + DeviceMemory<float>* output_data, + ScratchAllocator* workspace_allocator) = 0; virtual bool DoPoolForward(Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<double>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<double>* output_data) { + DeviceMemory<double>* output_data, + ScratchAllocator* workspace_allocator) { LOG(FATAL) << "DoPoolForward not implemented for double."; return false; } @@ -1569,7 +1571,8 @@ class DnnSupport { const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<Eigen::half>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<Eigen::half>* output_data) { + DeviceMemory<Eigen::half>* output_data, + ScratchAllocator* workspace_allocator) { LOG(FATAL) << "DoPoolForward not implemented for float16."; return false; } @@ -1582,7 +1585,8 @@ class DnnSupport { const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<double>& output_data, const DeviceMemory<double>& input_diff_data, - DeviceMemory<double>* output_diff_data) { + DeviceMemory<double>* output_diff_data, + ScratchAllocator* workspace_allocator) { LOG(FATAL) << "DoPoolBackward not implemented."; return false; } @@ -1594,7 +1598,8 @@ class DnnSupport { const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<float>& output_data, const DeviceMemory<float>& input_diff_data, - DeviceMemory<float>* output_diff_data) { + DeviceMemory<float>* output_diff_data, + ScratchAllocator* workspace_allocator) { LOG(FATAL) << "DoPoolBackward not implemented."; return false; } @@ -1606,7 +1611,8 @@ class DnnSupport { const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<Eigen::half>& output_data, const DeviceMemory<Eigen::half>& input_diff_data, - DeviceMemory<Eigen::half>* output_diff_data) { + DeviceMemory<Eigen::half>* output_diff_data, + ScratchAllocator* workspace_allocator) { LOG(FATAL) << "DoPoolBackward not implemented."; return false; } @@ -1653,7 +1659,8 @@ class DnnSupport { const DeviceMemory<float>& raw_data, const DeviceMemory<float>& normalized_data, const DeviceMemory<float>& normalized_variable_gradient, - DeviceMemory<float>* raw_variable_gradient) { + DeviceMemory<float>* raw_variable_gradient, + ScratchAllocator* workspace_allocator) { return false; } diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 9369183133..ca1b8e28e6 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -1377,15 +1377,16 @@ Stream &Stream::ThenPoolForward( const dnn::BatchDescriptor &input_dimensions, const DeviceMemory<double> &input_data, const dnn::BatchDescriptor &output_dimensions, - DeviceMemory<double> *output_data) { + DeviceMemory<double> *output_data, ScratchAllocator *workspace_allocator) { VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions), - PARAM(input_data), PARAM(output_dimensions), PARAM(output_data)); + PARAM(input_data), PARAM(output_dimensions), PARAM(output_data), + PARAM(workspace_allocator)); if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, - input_data, output_dimensions, - output_data)); + input_data, output_dimensions, output_data, + workspace_allocator)); } else { SetError(); LOG(WARNING) @@ -1401,15 +1402,16 @@ Stream &Stream::ThenPoolForward( const dnn::BatchDescriptor &input_dimensions, const DeviceMemory<float> &input_data, const dnn::BatchDescriptor &output_dimensions, - DeviceMemory<float> *output_data) { + DeviceMemory<float> *output_data, ScratchAllocator *workspace_allocator) { VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions), - PARAM(input_data), PARAM(output_dimensions), PARAM(output_data)); + PARAM(input_data), PARAM(output_dimensions), PARAM(output_data), + PARAM(workspace_allocator)); if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, - input_data, output_dimensions, - output_data)); + input_data, output_dimensions, output_data, + workspace_allocator)); } else { SetErrorAndLogNoDnnSupport(); } @@ -1422,15 +1424,17 @@ Stream &Stream::ThenPoolForward( const dnn::BatchDescriptor &input_dimensions, const DeviceMemory<Eigen::half> &input_data, const dnn::BatchDescriptor &output_dimensions, - DeviceMemory<Eigen::half> *output_data) { + DeviceMemory<Eigen::half> *output_data, + ScratchAllocator *workspace_allocator) { VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions), - PARAM(input_data), PARAM(output_dimensions), PARAM(output_data)); + PARAM(input_data), PARAM(output_dimensions), PARAM(output_data), + PARAM(workspace_allocator)); if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, - input_data, output_dimensions, - output_data)); + input_data, output_dimensions, output_data, + workspace_allocator)); } else { SetErrorAndLogNoDnnSupport(); } @@ -1445,16 +1449,19 @@ Stream &Stream::ThenPoolBackward( const dnn::BatchDescriptor &output_dimensions, const DeviceMemory<double> &output_data, const DeviceMemory<double> &input_diff_data, - DeviceMemory<double> *output_diff_data) { + DeviceMemory<double> *output_diff_data, + ScratchAllocator *workspace_allocator) { VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions), PARAM(input_data), PARAM(output_dimensions), PARAM(output_data), - PARAM(input_diff_data), PARAM(output_diff_data)); + PARAM(input_diff_data), PARAM(output_diff_data), + PARAM(workspace_allocator)); if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions, input_data, output_dimensions, output_data, - input_diff_data, output_diff_data)); + input_diff_data, output_diff_data, + workspace_allocator)); } else { SetError(); LOG(WARNING) @@ -1472,16 +1479,19 @@ Stream &Stream::ThenPoolBackward( const dnn::BatchDescriptor &output_dimensions, const DeviceMemory<float> &output_data, const DeviceMemory<float> &input_diff_data, - DeviceMemory<float> *output_diff_data) { + DeviceMemory<float> *output_diff_data, + ScratchAllocator *workspace_allocator) { VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions), PARAM(input_data), PARAM(output_dimensions), PARAM(output_data), - PARAM(input_diff_data), PARAM(output_diff_data)); + PARAM(input_diff_data), PARAM(output_diff_data), + PARAM(workspace_allocator)); if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions, input_data, output_dimensions, output_data, - input_diff_data, output_diff_data)); + input_diff_data, output_diff_data, + workspace_allocator)); } else { SetErrorAndLogNoDnnSupport(); } @@ -1496,16 +1506,19 @@ Stream &Stream::ThenPoolBackward( const dnn::BatchDescriptor &output_dimensions, const DeviceMemory<Eigen::half> &output_data, const DeviceMemory<Eigen::half> &input_diff_data, - DeviceMemory<Eigen::half> *output_diff_data) { + DeviceMemory<Eigen::half> *output_diff_data, + ScratchAllocator *workspace_allocator) { VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions), PARAM(input_data), PARAM(output_dimensions), PARAM(output_data), - PARAM(input_diff_data), PARAM(output_diff_data)); + PARAM(input_diff_data), PARAM(output_diff_data), + PARAM(workspace_allocator)); if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions, input_data, output_dimensions, output_data, - input_diff_data, output_diff_data)); + input_diff_data, output_diff_data, + workspace_allocator)); } else { SetErrorAndLogNoDnnSupport(); } @@ -1552,16 +1565,18 @@ Stream &Stream::ThenNormalizeBackwardWithDimensions( const dnn::BatchDescriptor &dimensions, const DeviceMemory<float> &raw_data, const DeviceMemory<float> &normalized_data, const DeviceMemory<float> &normalized_variable_gradient, - DeviceMemory<float> *raw_variable_gradient) { + DeviceMemory<float> *raw_variable_gradient, + ScratchAllocator *workspace_allocator) { VLOG_CALL(PARAM(normalize_descriptor), PARAM(dimensions), PARAM(raw_data), PARAM(normalized_data), PARAM(normalized_variable_gradient), - PARAM(raw_variable_gradient)); + PARAM(raw_variable_gradient), PARAM(workspace_allocator)); if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { CheckError(dnn->DoNormalizeBackwardWithDimensions( this, normalize_descriptor, dimensions, raw_data, normalized_data, - normalized_variable_gradient, raw_variable_gradient)); + normalized_variable_gradient, raw_variable_gradient, + workspace_allocator)); } else { SetErrorAndLogNoDnnSupport(); } diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index e8885e1eb6..63d64947c8 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -629,19 +629,22 @@ class Stream { const dnn::BatchDescriptor &input_dimensions, const DeviceMemory<double> &input_data, const dnn::BatchDescriptor &output_dimensions, - DeviceMemory<double> *output_data); + DeviceMemory<double> *output_data, + ScratchAllocator *workspace_allocator = nullptr); Stream &ThenPoolForward(const dnn::PoolingDescriptor &pooling_dimensions, const dnn::BatchDescriptor &input_dimensions, const DeviceMemory<float> &input_data, const dnn::BatchDescriptor &output_dimensions, - DeviceMemory<float> *output_data); + DeviceMemory<float> *output_data, + ScratchAllocator *workspace_allocator = nullptr); Stream &ThenPoolForward(const dnn::PoolingDescriptor &pooling_dimensions, const dnn::BatchDescriptor &input_dimensions, const DeviceMemory<Eigen::half> &input_data, const dnn::BatchDescriptor &output_dimensions, - DeviceMemory<Eigen::half> *output_data); + DeviceMemory<Eigen::half> *output_data, + ScratchAllocator *workspace_allocator = nullptr); Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions, const dnn::BatchDescriptor &input_dimensions, @@ -649,7 +652,8 @@ class Stream { const dnn::BatchDescriptor &output_dimensions, const DeviceMemory<double> &output_data, const DeviceMemory<double> &input_diff_data, - DeviceMemory<double> *output_diff_data); + DeviceMemory<double> *output_diff_data, + ScratchAllocator *workspace_allocator = nullptr); Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions, const dnn::BatchDescriptor &input_dimensions, @@ -657,7 +661,8 @@ class Stream { const dnn::BatchDescriptor &output_dimensions, const DeviceMemory<float> &output_data, const DeviceMemory<float> &input_diff_data, - DeviceMemory<float> *output_diff_data); + DeviceMemory<float> *output_diff_data, + ScratchAllocator *workspace_allocator = nullptr); Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions, const dnn::BatchDescriptor &input_dimensions, @@ -665,7 +670,8 @@ class Stream { const dnn::BatchDescriptor &output_dimensions, const DeviceMemory<Eigen::half> &output_data, const DeviceMemory<Eigen::half> &input_diff_data, - DeviceMemory<Eigen::half> *output_diff_data); + DeviceMemory<Eigen::half> *output_diff_data, + ScratchAllocator *workspace_allocator = nullptr); Stream &ThenNormalize(const dnn::NormalizeDescriptor &normalize_descriptor, const DeviceMemory<float> &input_data, @@ -684,7 +690,8 @@ class Stream { const DeviceMemory<float> &raw_data, const DeviceMemory<float> &normalized_data, const DeviceMemory<float> &normalized_variable_gradient, - DeviceMemory<float> *raw_variable_gradient); + DeviceMemory<float> *raw_variable_gradient, + ScratchAllocator *workspace_allocator = nullptr); Stream &ThenActivate(dnn::ActivationMode activation_mode, const dnn::BatchDescriptor &dimensions, |