aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/common_runtime/gpu/gpu_util.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-06-27 12:52:51 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-27 12:56:12 -0700
commitdc0afc1c2221e3e155e7a6edc3017f38496acd90 (patch)
tree76cfbd3f2fdc29c200910bfde86b4a253af21d4c /tensorflow/core/common_runtime/gpu/gpu_util.cc
parent06228fb70858ef50f31cc8cdf909121c80e100b2 (diff)
Add GPUOptions::num_dev_to_dev_copy_streams to allow creation of
more than one device-to-device copy stream per GPU device. This is an experimental feature that will have no effect unless copy operations explicitly request a stream other than 0, which currently does not occur anywhere in a standard build. Eventually it may be of benefit in the presence of multiple bi-directional concurrent data copies. PiperOrigin-RevId: 202354513
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/gpu_util.cc')
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_util.cc14
1 files changed, 6 insertions, 8 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_util.cc b/tensorflow/core/common_runtime/gpu/gpu_util.cc
index d38413d79c..042a1c0fe0 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_util.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_util.cc
@@ -185,13 +185,11 @@ void GPUUtil::SetProtoFromGPU(const Tensor& tensor, Device* dev,
}
// static
-void GPUUtil::DeviceToDeviceCopy(DeviceContext* send_dev_context,
- DeviceContext* recv_dev_context, Device* src,
- Device* dst,
- AllocatorAttributes src_alloc_attr,
- AllocatorAttributes dst_alloc_attr,
- const Tensor* input, Tensor* output,
- StatusCallback done) {
+void GPUUtil::DeviceToDeviceCopy(
+ DeviceContext* send_dev_context, DeviceContext* recv_dev_context,
+ Device* src, Device* dst, AllocatorAttributes src_alloc_attr,
+ AllocatorAttributes dst_alloc_attr, const Tensor* input, Tensor* output,
+ int dev_to_dev_stream_index, StatusCallback done) {
const DeviceBase::GpuDeviceInfo* dev_info = nullptr;
se::Stream* send_stream = nullptr;
Status s = PrepareCopy(src, send_dev_context, *input, output, &dev_info,
@@ -202,7 +200,7 @@ void GPUUtil::DeviceToDeviceCopy(DeviceContext* send_dev_context,
}
auto send_device_to_device_stream =
static_cast<const GPUDeviceContext*>(send_dev_context)
- ->device_to_device_stream();
+ ->device_to_device_stream(dev_to_dev_stream_index);
if (send_device_to_device_stream == nullptr) {
done(errors::Internal("No send gpu copy-out-stream is available."));
return;