diff options
author | 2017-04-04 16:10:08 -0800 | |
---|---|---|
committer | 2017-04-04 17:24:57 -0700 | |
commit | ccbc8991db3943ef984405881a1c917c530f902f (patch) | |
tree | a7b5c760155bfa4ff95ffc0ebd3823c649668997 /tensorflow/stream_executor | |
parent | 9477900946f923cb43ed76ed215490d01474bfe7 (diff) |
Merge changes from github.
Change: 152200430
Diffstat (limited to 'tensorflow/stream_executor')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.cc | 73 | ||||
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.h | 16 | ||||
-rw-r--r-- | tensorflow/stream_executor/dnn.h | 38 | ||||
-rw-r--r-- | tensorflow/stream_executor/stream.cc | 51 | ||||
-rw-r--r-- | tensorflow/stream_executor/stream.h | 14 |
5 files changed, 188 insertions, 4 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 965061053f..6c06a73943 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -3084,6 +3084,41 @@ bool CudnnSupport::DoActivate(Stream* stream, bool CudnnSupport::DoPoolForward( Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, const dnn::BatchDescriptor& input_dimensions, + const DeviceMemory<double>& input_data, + const dnn::BatchDescriptor& output_dimensions, + DeviceMemory<double>* output_data) { + mutex_lock lock{dnn_handle_mutex_}; + auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), + AsCUDAStreamValue(stream)); + if (status != CUDNN_STATUS_SUCCESS) { + LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); + return false; + } + + // Alpha is the scaling factor for input. + double alpha = 1.0; + // Beta is the scaling factor for output. + double beta = 0.0; + + ScopedTensorDescriptor src_desc{parent_, input_dimensions, CUDNN_DATA_DOUBLE}; + ScopedTensorDescriptor dest_desc{parent_, output_dimensions, + CUDNN_DATA_DOUBLE}; + ScopedPoolingDescriptor pooling_desc{parent_, pooling_dimensions}; + status = wrap::cudnnPoolingForward( + parent_, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, + src_desc.handle(), input_data.opaque(), &beta, dest_desc.handle(), + output_data->opaque()); + if (status != CUDNN_STATUS_SUCCESS) { + LOG(ERROR) << "failed to enqueue forward pooling on stream: " + << ToString(status); + return false; + } + return true; +} + +bool CudnnSupport::DoPoolForward( + Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, + const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<float>& input_data, const dnn::BatchDescriptor& output_dimensions, DeviceMemory<float>* output_data) { @@ -3153,6 +3188,44 @@ bool CudnnSupport::DoPoolForward( bool CudnnSupport::DoPoolBackward( Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, const dnn::BatchDescriptor& input_dimensions, + const DeviceMemory<double>& input_data, + const dnn::BatchDescriptor& output_dimensions, + const DeviceMemory<double>& output_data, + const DeviceMemory<double>& input_diff_data, + DeviceMemory<double>* output_diff_data) { + mutex_lock lock{dnn_handle_mutex_}; + auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), + AsCUDAStreamValue(stream)); + if (status != CUDNN_STATUS_SUCCESS) { + LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status); + return false; + } + + // Alpha is the scaling factor for input. + double alpha = 1.0; + // Beta is the scaling factor for output. + double beta = 0.0; + + ScopedTensorDescriptor src_desc{parent_, input_dimensions, CUDNN_DATA_DOUBLE}; + ScopedTensorDescriptor dest_desc{parent_, output_dimensions, + CUDNN_DATA_DOUBLE}; + ScopedPoolingDescriptor pooling_desc{parent_, pooling_dimensions}; + status = wrap::cudnnPoolingBackward( + parent_, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha, + dest_desc.handle(), output_data.opaque(), dest_desc.handle(), + input_diff_data.opaque(), src_desc.handle(), input_data.opaque(), &beta, + src_desc.handle(), output_diff_data->opaque()); + if (status != CUDNN_STATUS_SUCCESS) { + LOG(ERROR) << "failed to enqueue backward pooling on stream: " + << ToString(status); + return false; + } + return true; +} + +bool CudnnSupport::DoPoolBackward( + Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, + const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<float>& input_data, const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<float>& output_data, diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h index cfc7e29574..b280b73c70 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.h +++ b/tensorflow/stream_executor/cuda/cuda_dnn.h @@ -308,6 +308,13 @@ class CudnnSupport : public dnn::DnnSupport { bool DoPoolForward(Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, const dnn::BatchDescriptor& input_dimensions, + const DeviceMemory<double>& input_data, + const dnn::BatchDescriptor& output_dimensions, + DeviceMemory<double>* output_data) override; + + bool DoPoolForward(Stream* stream, + const dnn::PoolingDescriptor& pooling_dimensions, + const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<float>& input_data, const dnn::BatchDescriptor& output_dimensions, DeviceMemory<float>* output_data) override; @@ -322,6 +329,15 @@ class CudnnSupport : public dnn::DnnSupport { bool DoPoolBackward(Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, const dnn::BatchDescriptor& input_dimensions, + const DeviceMemory<double>& input_data, + const dnn::BatchDescriptor& output_dimensions, + const DeviceMemory<double>& output_data, + const DeviceMemory<double>& input_diff_data, + DeviceMemory<double>* output_diff_data) override; + + bool DoPoolBackward(Stream* stream, + const dnn::PoolingDescriptor& pooling_dimensions, + const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<float>& input_data, const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<float>& output_data, diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index d6b3f51705..c5805064f3 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -1283,19 +1283,47 @@ class DnnSupport { virtual bool DoPoolForward(Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, const dnn::BatchDescriptor& input_dimensions, + const DeviceMemory<double>& input_data, + const dnn::BatchDescriptor& output_dimensions, + DeviceMemory<double>* output_data) { + LOG(FATAL) << "DoPoolForward not implemented for double."; + return false; + } + + virtual bool DoPoolForward(Stream* stream, + const dnn::PoolingDescriptor& pooling_dimensions, + const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<Eigen::half>& input_data, const dnn::BatchDescriptor& output_dimensions, - DeviceMemory<Eigen::half>* output_data) = 0; + DeviceMemory<Eigen::half>* output_data) { + LOG(FATAL) << "DoPoolForward not implemented for float16."; + return false; + } // Performs differentiation of the pooling operation. virtual bool DoPoolBackward(Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, const dnn::BatchDescriptor& input_dimensions, + const DeviceMemory<double>& input_data, + const dnn::BatchDescriptor& output_dimensions, + const DeviceMemory<double>& output_data, + const DeviceMemory<double>& input_diff_data, + DeviceMemory<double>* output_diff_data) { + LOG(FATAL) << "DoPoolBackward not implemented."; + return false; + } + + virtual bool DoPoolBackward(Stream* stream, + const dnn::PoolingDescriptor& pooling_dimensions, + const dnn::BatchDescriptor& input_dimensions, const DeviceMemory<float>& input_data, const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<float>& output_data, const DeviceMemory<float>& input_diff_data, - DeviceMemory<float>* output_diff_data) = 0; + DeviceMemory<float>* output_diff_data) { + LOG(FATAL) << "DoPoolBackward not implemented."; + return false; + } virtual bool DoPoolBackward(Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions, @@ -1304,7 +1332,10 @@ class DnnSupport { const dnn::BatchDescriptor& output_dimensions, const DeviceMemory<Eigen::half>& output_data, const DeviceMemory<Eigen::half>& input_diff_data, - DeviceMemory<Eigen::half>* output_diff_data) = 0; + DeviceMemory<Eigen::half>* output_diff_data) { + LOG(FATAL) << "DoPoolBackward not implemented."; + return false; + } // Applies local response normalization to the values from // input_data and writes the result to output_data. See comments on @@ -1884,4 +1915,3 @@ class DnnSupport { } // namespace perftools #endif // TENSORFLOW_STREAM_EXECUTOR_DNN_H_ - diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 76cbf0b1b6..a393b07703 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -966,6 +966,30 @@ Stream &Stream::ThenBiasAdd(const DeviceMemory<float> &input_data, Stream &Stream::ThenPoolForward( const dnn::PoolingDescriptor &pooling_dimensions, const dnn::BatchDescriptor &input_dimensions, + const DeviceMemory<double> &input_data, + const dnn::BatchDescriptor &output_dimensions, + DeviceMemory<double> *output_data) { + VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions), + PARAM(input_data), PARAM(output_dimensions), PARAM(output_data)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, + input_data, output_dimensions, + output_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; +} + +Stream &Stream::ThenPoolForward( + const dnn::PoolingDescriptor &pooling_dimensions, + const dnn::BatchDescriptor &input_dimensions, const DeviceMemory<float> &input_data, const dnn::BatchDescriptor &output_dimensions, DeviceMemory<float> *output_data) { @@ -1008,6 +1032,33 @@ Stream &Stream::ThenPoolForward( Stream &Stream::ThenPoolBackward( const dnn::PoolingDescriptor &pooling_dimensions, const dnn::BatchDescriptor &input_dimensions, + const DeviceMemory<double> &input_data, + const dnn::BatchDescriptor &output_dimensions, + const DeviceMemory<double> &output_data, + const DeviceMemory<double> &input_diff_data, + DeviceMemory<double> *output_diff_data) { + VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions), + PARAM(input_data), PARAM(output_dimensions), PARAM(output_data), + PARAM(input_diff_data), PARAM(output_diff_data)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions, + input_data, output_dimensions, output_data, + input_diff_data, output_diff_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; +} + +Stream &Stream::ThenPoolBackward( + const dnn::PoolingDescriptor &pooling_dimensions, + const dnn::BatchDescriptor &input_dimensions, const DeviceMemory<float> &input_data, const dnn::BatchDescriptor &output_dimensions, const DeviceMemory<float> &output_data, diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index f22fba1d74..5b46b86f54 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -467,6 +467,12 @@ class Stream { Stream &ThenPoolForward(const dnn::PoolingDescriptor &pooling_dimensions, const dnn::BatchDescriptor &input_dimensions, + const DeviceMemory<double> &input_data, + const dnn::BatchDescriptor &output_dimensions, + DeviceMemory<double> *output_data); + + Stream &ThenPoolForward(const dnn::PoolingDescriptor &pooling_dimensions, + const dnn::BatchDescriptor &input_dimensions, const DeviceMemory<float> &input_data, const dnn::BatchDescriptor &output_dimensions, DeviceMemory<float> *output_data); @@ -479,6 +485,14 @@ class Stream { Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions, const dnn::BatchDescriptor &input_dimensions, + const DeviceMemory<double> &input_data, + const dnn::BatchDescriptor &output_dimensions, + const DeviceMemory<double> &output_data, + const DeviceMemory<double> &input_diff_data, + DeviceMemory<double> *output_diff_data); + + Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions, + const dnn::BatchDescriptor &input_dimensions, const DeviceMemory<float> &input_data, const dnn::BatchDescriptor &output_dimensions, const DeviceMemory<float> &output_data, |