diff options
author | 2018-06-27 12:52:51 -0700 | |
---|---|---|
committer | 2018-06-27 12:56:12 -0700 | |
commit | dc0afc1c2221e3e155e7a6edc3017f38496acd90 (patch) | |
tree | 76cfbd3f2fdc29c200910bfde86b4a253af21d4c /tensorflow/core/common_runtime/gpu/gpu_util.cc | |
parent | 06228fb70858ef50f31cc8cdf909121c80e100b2 (diff) |
Add GPUOptions::num_dev_to_dev_copy_streams to allow creation of
more than one device-to-device copy stream per GPU device.
This is an experimental feature that will have no effect unless
copy operations explicitly request a stream other than 0, which
currently does not occur anywhere in a standard build.
Eventually it may be of benefit in the presence of multiple
bi-directional concurrent data copies.
PiperOrigin-RevId: 202354513
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/gpu_util.cc')
-rw-r--r-- | tensorflow/core/common_runtime/gpu/gpu_util.cc | 14 |
1 files changed, 6 insertions, 8 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_util.cc b/tensorflow/core/common_runtime/gpu/gpu_util.cc index d38413d79c..042a1c0fe0 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_util.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_util.cc @@ -185,13 +185,11 @@ void GPUUtil::SetProtoFromGPU(const Tensor& tensor, Device* dev, } // static -void GPUUtil::DeviceToDeviceCopy(DeviceContext* send_dev_context, - DeviceContext* recv_dev_context, Device* src, - Device* dst, - AllocatorAttributes src_alloc_attr, - AllocatorAttributes dst_alloc_attr, - const Tensor* input, Tensor* output, - StatusCallback done) { +void GPUUtil::DeviceToDeviceCopy( + DeviceContext* send_dev_context, DeviceContext* recv_dev_context, + Device* src, Device* dst, AllocatorAttributes src_alloc_attr, + AllocatorAttributes dst_alloc_attr, const Tensor* input, Tensor* output, + int dev_to_dev_stream_index, StatusCallback done) { const DeviceBase::GpuDeviceInfo* dev_info = nullptr; se::Stream* send_stream = nullptr; Status s = PrepareCopy(src, send_dev_context, *input, output, &dev_info, @@ -202,7 +200,7 @@ void GPUUtil::DeviceToDeviceCopy(DeviceContext* send_dev_context, } auto send_device_to_device_stream = static_cast<const GPUDeviceContext*>(send_dev_context) - ->device_to_device_stream(); + ->device_to_device_stream(dev_to_dev_stream_index); if (send_device_to_device_stream == nullptr) { done(errors::Internal("No send gpu copy-out-stream is available.")); return; |