aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-04-04 16:10:08 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-04-04 17:24:57 -0700
commitccbc8991db3943ef984405881a1c917c530f902f (patch)
treea7b5c760155bfa4ff95ffc0ebd3823c649668997 /tensorflow/stream_executor
parent9477900946f923cb43ed76ed215490d01474bfe7 (diff)
Merge changes from github.
Change: 152200430
Diffstat (limited to 'tensorflow/stream_executor')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.cc73
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.h16
-rw-r--r--tensorflow/stream_executor/dnn.h38
-rw-r--r--tensorflow/stream_executor/stream.cc51
-rw-r--r--tensorflow/stream_executor/stream.h14
5 files changed, 188 insertions, 4 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 965061053f..6c06a73943 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -3084,6 +3084,41 @@ bool CudnnSupport::DoActivate(Stream* stream,
bool CudnnSupport::DoPoolForward(
Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions,
const dnn::BatchDescriptor& input_dimensions,
+ const DeviceMemory<double>& input_data,
+ const dnn::BatchDescriptor& output_dimensions,
+ DeviceMemory<double>* output_data) {
+ mutex_lock lock{dnn_handle_mutex_};
+ auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_),
+ AsCUDAStreamValue(stream));
+ if (status != CUDNN_STATUS_SUCCESS) {
+ LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status);
+ return false;
+ }
+
+ // Alpha is the scaling factor for input.
+ double alpha = 1.0;
+ // Beta is the scaling factor for output.
+ double beta = 0.0;
+
+ ScopedTensorDescriptor src_desc{parent_, input_dimensions, CUDNN_DATA_DOUBLE};
+ ScopedTensorDescriptor dest_desc{parent_, output_dimensions,
+ CUDNN_DATA_DOUBLE};
+ ScopedPoolingDescriptor pooling_desc{parent_, pooling_dimensions};
+ status = wrap::cudnnPoolingForward(
+ parent_, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha,
+ src_desc.handle(), input_data.opaque(), &beta, dest_desc.handle(),
+ output_data->opaque());
+ if (status != CUDNN_STATUS_SUCCESS) {
+ LOG(ERROR) << "failed to enqueue forward pooling on stream: "
+ << ToString(status);
+ return false;
+ }
+ return true;
+}
+
+bool CudnnSupport::DoPoolForward(
+ Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions,
+ const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& output_dimensions,
DeviceMemory<float>* output_data) {
@@ -3153,6 +3188,44 @@ bool CudnnSupport::DoPoolForward(
bool CudnnSupport::DoPoolBackward(
Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions,
const dnn::BatchDescriptor& input_dimensions,
+ const DeviceMemory<double>& input_data,
+ const dnn::BatchDescriptor& output_dimensions,
+ const DeviceMemory<double>& output_data,
+ const DeviceMemory<double>& input_diff_data,
+ DeviceMemory<double>* output_diff_data) {
+ mutex_lock lock{dnn_handle_mutex_};
+ auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_),
+ AsCUDAStreamValue(stream));
+ if (status != CUDNN_STATUS_SUCCESS) {
+ LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status);
+ return false;
+ }
+
+ // Alpha is the scaling factor for input.
+ double alpha = 1.0;
+ // Beta is the scaling factor for output.
+ double beta = 0.0;
+
+ ScopedTensorDescriptor src_desc{parent_, input_dimensions, CUDNN_DATA_DOUBLE};
+ ScopedTensorDescriptor dest_desc{parent_, output_dimensions,
+ CUDNN_DATA_DOUBLE};
+ ScopedPoolingDescriptor pooling_desc{parent_, pooling_dimensions};
+ status = wrap::cudnnPoolingBackward(
+ parent_, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha,
+ dest_desc.handle(), output_data.opaque(), dest_desc.handle(),
+ input_diff_data.opaque(), src_desc.handle(), input_data.opaque(), &beta,
+ src_desc.handle(), output_diff_data->opaque());
+ if (status != CUDNN_STATUS_SUCCESS) {
+ LOG(ERROR) << "failed to enqueue backward pooling on stream: "
+ << ToString(status);
+ return false;
+ }
+ return true;
+}
+
+bool CudnnSupport::DoPoolBackward(
+ Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions,
+ const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<float>& output_data,
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index cfc7e29574..b280b73c70 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -308,6 +308,13 @@ class CudnnSupport : public dnn::DnnSupport {
bool DoPoolForward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
const dnn::BatchDescriptor& input_dimensions,
+ const DeviceMemory<double>& input_data,
+ const dnn::BatchDescriptor& output_dimensions,
+ DeviceMemory<double>* output_data) override;
+
+ bool DoPoolForward(Stream* stream,
+ const dnn::PoolingDescriptor& pooling_dimensions,
+ const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& output_dimensions,
DeviceMemory<float>* output_data) override;
@@ -322,6 +329,15 @@ class CudnnSupport : public dnn::DnnSupport {
bool DoPoolBackward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
const dnn::BatchDescriptor& input_dimensions,
+ const DeviceMemory<double>& input_data,
+ const dnn::BatchDescriptor& output_dimensions,
+ const DeviceMemory<double>& output_data,
+ const DeviceMemory<double>& input_diff_data,
+ DeviceMemory<double>* output_diff_data) override;
+
+ bool DoPoolBackward(Stream* stream,
+ const dnn::PoolingDescriptor& pooling_dimensions,
+ const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<float>& output_data,
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index d6b3f51705..c5805064f3 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -1283,19 +1283,47 @@ class DnnSupport {
virtual bool DoPoolForward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
const dnn::BatchDescriptor& input_dimensions,
+ const DeviceMemory<double>& input_data,
+ const dnn::BatchDescriptor& output_dimensions,
+ DeviceMemory<double>* output_data) {
+ LOG(FATAL) << "DoPoolForward not implemented for double.";
+ return false;
+ }
+
+ virtual bool DoPoolForward(Stream* stream,
+ const dnn::PoolingDescriptor& pooling_dimensions,
+ const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<Eigen::half>& input_data,
const dnn::BatchDescriptor& output_dimensions,
- DeviceMemory<Eigen::half>* output_data) = 0;
+ DeviceMemory<Eigen::half>* output_data) {
+ LOG(FATAL) << "DoPoolForward not implemented for float16.";
+ return false;
+ }
// Performs differentiation of the pooling operation.
virtual bool DoPoolBackward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
const dnn::BatchDescriptor& input_dimensions,
+ const DeviceMemory<double>& input_data,
+ const dnn::BatchDescriptor& output_dimensions,
+ const DeviceMemory<double>& output_data,
+ const DeviceMemory<double>& input_diff_data,
+ DeviceMemory<double>* output_diff_data) {
+ LOG(FATAL) << "DoPoolBackward not implemented.";
+ return false;
+ }
+
+ virtual bool DoPoolBackward(Stream* stream,
+ const dnn::PoolingDescriptor& pooling_dimensions,
+ const dnn::BatchDescriptor& input_dimensions,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<float>& output_data,
const DeviceMemory<float>& input_diff_data,
- DeviceMemory<float>* output_diff_data) = 0;
+ DeviceMemory<float>* output_diff_data) {
+ LOG(FATAL) << "DoPoolBackward not implemented.";
+ return false;
+ }
virtual bool DoPoolBackward(Stream* stream,
const dnn::PoolingDescriptor& pooling_dimensions,
@@ -1304,7 +1332,10 @@ class DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
const DeviceMemory<Eigen::half>& output_data,
const DeviceMemory<Eigen::half>& input_diff_data,
- DeviceMemory<Eigen::half>* output_diff_data) = 0;
+ DeviceMemory<Eigen::half>* output_diff_data) {
+ LOG(FATAL) << "DoPoolBackward not implemented.";
+ return false;
+ }
// Applies local response normalization to the values from
// input_data and writes the result to output_data. See comments on
@@ -1884,4 +1915,3 @@ class DnnSupport {
} // namespace perftools
#endif // TENSORFLOW_STREAM_EXECUTOR_DNN_H_
-
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 76cbf0b1b6..a393b07703 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -966,6 +966,30 @@ Stream &Stream::ThenBiasAdd(const DeviceMemory<float> &input_data,
Stream &Stream::ThenPoolForward(
const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
+ const DeviceMemory<double> &input_data,
+ const dnn::BatchDescriptor &output_dimensions,
+ DeviceMemory<double> *output_data) {
+ VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
+ PARAM(input_data), PARAM(output_dimensions), PARAM(output_data));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
+ input_data, output_dimensions,
+ output_data));
+ } else {
+ SetError();
+ LOG(WARNING)
+ << "attempting to perform DNN operation using StreamExecutor "
+ "without DNN support";
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenPoolForward(
+ const dnn::PoolingDescriptor &pooling_dimensions,
+ const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<float> &input_data,
const dnn::BatchDescriptor &output_dimensions,
DeviceMemory<float> *output_data) {
@@ -1008,6 +1032,33 @@ Stream &Stream::ThenPoolForward(
Stream &Stream::ThenPoolBackward(
const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
+ const DeviceMemory<double> &input_data,
+ const dnn::BatchDescriptor &output_dimensions,
+ const DeviceMemory<double> &output_data,
+ const DeviceMemory<double> &input_diff_data,
+ DeviceMemory<double> *output_diff_data) {
+ VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
+ PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
+ PARAM(input_diff_data), PARAM(output_diff_data));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
+ input_data, output_dimensions, output_data,
+ input_diff_data, output_diff_data));
+ } else {
+ SetError();
+ LOG(WARNING)
+ << "attempting to perform DNN operation using StreamExecutor "
+ "without DNN support";
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenPoolBackward(
+ const dnn::PoolingDescriptor &pooling_dimensions,
+ const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<float> &input_data,
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<float> &output_data,
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index f22fba1d74..5b46b86f54 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -467,6 +467,12 @@ class Stream {
Stream &ThenPoolForward(const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
+ const DeviceMemory<double> &input_data,
+ const dnn::BatchDescriptor &output_dimensions,
+ DeviceMemory<double> *output_data);
+
+ Stream &ThenPoolForward(const dnn::PoolingDescriptor &pooling_dimensions,
+ const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<float> &input_data,
const dnn::BatchDescriptor &output_dimensions,
DeviceMemory<float> *output_data);
@@ -479,6 +485,14 @@ class Stream {
Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions,
const dnn::BatchDescriptor &input_dimensions,
+ const DeviceMemory<double> &input_data,
+ const dnn::BatchDescriptor &output_dimensions,
+ const DeviceMemory<double> &output_data,
+ const DeviceMemory<double> &input_diff_data,
+ DeviceMemory<double> *output_diff_data);
+
+ Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions,
+ const dnn::BatchDescriptor &input_dimensions,
const DeviceMemory<float> &input_data,
const dnn::BatchDescriptor &output_dimensions,
const DeviceMemory<float> &output_data,