aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/verbs
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-06-26 14:00:17 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-06-26 14:04:35 -0700
commit1fa73c53ab95693f070ce70e6be0c644d83c163a (patch)
treeffbedf825daf1f3453c695a433c8a9cdf93f6019 /tensorflow/contrib/verbs
parentb13e96e21c1229a905a623111dd89d2bd0cba53b (diff)
Automated g4 rollback of changelist 160182040
PiperOrigin-RevId: 160190881
Diffstat (limited to 'tensorflow/contrib/verbs')
-rw-r--r--tensorflow/contrib/verbs/rdma.cc55
-rw-r--r--tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc41
-rw-r--r--tensorflow/contrib/verbs/verbs_util.cc34
-rw-r--r--tensorflow/contrib/verbs/verbs_util.h10
4 files changed, 16 insertions, 124 deletions
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index 6f3a616fe8..bc687be0ab 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -21,7 +21,6 @@ limitations under the License.
#include "tensorflow/core/common_runtime/device_mgr.h"
#include "tensorflow/core/common_runtime/dma_helper.h"
#include "tensorflow/core/common_runtime/gpu/gpu_util.h"
-#include "tensorflow/core/common_runtime/gpu/process_state.h"
#include "tensorflow/core/distributed_runtime/rendezvous_mgr_interface.h"
#include "tensorflow/core/distributed_runtime/session_mgr.h"
#include "tensorflow/core/framework/rendezvous.h"
@@ -684,6 +683,7 @@ void RdmaTensorBuffer::SendNextItem() {
<< " error message: " << status.error_message();
size_t buffer_size = RdmaMessage::kMessageTotalBytes;
size_t tensor_bytes = 0;
+ TensorProto proto;
// Figures out which device the tensor is hosted on.
Device* src_dev = nullptr;
Status s = channel_->adapter_->worker_env_->device_mgr->LookupDevice(
@@ -703,47 +703,21 @@ void RdmaTensorBuffer::SendNextItem() {
CHECK(s.ok()) << "dst device not found";
AllocatorAttributes dst_alloc_attr;
dst_alloc_attr.set_on_host(true);
-
- bool can_memcpy = DataTypeCanUseMemcpy(in.dtype());
// string tensor needs to be serialized
- Tensor copy;
- StringPiece copy_buf;
- TensorProto proto;
if (src_dev->tensorflow_gpu_device_info() &&
(!send_args.alloc_attrs.on_host())) {
CHECK(send_args.device_context)
- << "send dev name: " << src_dev->name()
- << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
-
- if (can_memcpy) {
- AllocatorAttributes host_alloc_attrs;
- host_alloc_attrs.set_gpu_compatible(true);
- host_alloc_attrs.set_on_host(true);
- Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0);
- copy = Tensor(alloc, in.dtype(), in.shape());
- s = VerbsUtil::CopyGPUTensorToCPUSync(
- src_dev, send_args.device_context, &in, &copy);
- CHECK(s.ok()) << "copy tensor from gpu sync";
- copy_buf = copy.tensor_data();
- } else {
- // "val" is on a GPU. Uses GPUUtil to fill the proto.
- s = VerbsUtil::SetProtoFromGPUSync(
- in, src_dev, send_args.device_context, &proto, is_dead);
- CHECK(s.ok()) << "set proto from gpu sync";
- }
+ << "send dev name: " << src_dev->name()
+ << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
+ // "val" is on a GPU. Uses GPUUtil to fill the proto.
+ s = VerbsUtil::SetProtoFromGPUSync(
+ in, src_dev, send_args.device_context, &proto, is_dead);
+ CHECK(s.ok()) << "set proto from gpu sync";
} else {
// tensor is in CPU memory.
- if (can_memcpy) {
- copy_buf = in.tensor_data();
- } else {
- in.AsProtoTensorContent(&proto);
- }
- }
- if (can_memcpy) {
- tensor_bytes = in.TotalBytes();
- } else {
- tensor_bytes = proto.ByteSize();
+ in.AsProtoTensorContent(&proto);
}
+ tensor_bytes = proto.ByteSize();
// maybe some margin for string tensor?
buffer_size += tensor_bytes;
// prepare message
@@ -797,16 +771,7 @@ void RdmaTensorBuffer::SendNextItem() {
static_cast<void*>(static_cast<char*>(buffer_) +
RdmaMessage::kTensorBufferStartIndex);
CHECK(tensor_bytes + RdmaMessage::kTensorBufferStartIndex <= size_);
- if (can_memcpy) {
- CHECK(copy_buf.size() == tensor_bytes)
- << "unexpected tensor size: "
- << copy_buf.size()
- << " != "
- << tensor_bytes;
- memcpy(output, copy_buf.data(), tensor_bytes);
- } else {
- proto.SerializeToArray(output, tensor_bytes);
- }
+ proto.SerializeToArray(output, tensor_bytes);
} else {
buffer_size = RdmaMessage::kMessageTotalBytes;
}
diff --git a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
index 9ea696589a..5871400f26 100644
--- a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
@@ -21,7 +21,6 @@ limitations under the License.
#include "tensorflow/core/common_runtime/device.h"
#include "tensorflow/core/common_runtime/device_mgr.h"
#include "tensorflow/core/common_runtime/dma_helper.h"
-#include "tensorflow/core/common_runtime/gpu/process_state.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/strings/numbers.h"
#include "tensorflow/core/lib/strings/str_util.h"
@@ -100,40 +99,12 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync(
if (!rm.is_dead_) {
void* input = static_cast<char*>(rb->buffer_) +
RdmaMessage::kTensorBufferStartIndex;
- bool can_memcpy = DataTypeCanUseMemcpy(rm.data_type_);
- if (can_memcpy) {
- if (dst_dev->tensorflow_gpu_device_info() &&
- (!recv_args.alloc_attrs.on_host())) {
- CHECK(recv_args.device_context)
- << "send dev name: " << src_dev->name()
- << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
- Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0);
- Tensor copy(alloc, rm.data_type_, rm.tensor_shape_);
- memcpy(DMAHelper::base(&copy), input, rm.tensor_bytes_);
-
- Allocator* dst_alloc = dst_dev->GetAllocator(recv_args.alloc_attrs);
- Tensor gpu_copy(dst_alloc, rm.data_type_, rm.tensor_shape_);
- s = VerbsUtil::CopyCPUTensorToGPUSync(&copy, recv_args.device_context,
- dst_dev, &gpu_copy);
- CHECK(s.ok()) << "copy tensor to gpu sync";
- val = std::move(gpu_copy);
- } else {
- AllocatorAttributes host_alloc_attrs;
- host_alloc_attrs.set_gpu_compatible(true);
- host_alloc_attrs.set_on_host(true);
- Allocator* alloc = dst_dev->GetAllocator(host_alloc_attrs);
- Tensor copy(alloc, rm.data_type_, rm.tensor_shape_);
- memcpy(DMAHelper::base(&copy), input, rm.tensor_bytes_);
- val = std::move(copy);
- }
- } else {
- TensorProto proto;
- CHECK(rm.tensor_bytes_ + RdmaMessage::kTensorBufferStartIndex <=
- rb->size_);
- CHECK(ParseProtoUnlimited(&proto, input, rm.tensor_bytes_))
- << "fail to parse proto from array";
- s = dst_dev->MakeTensorFromProto(proto, recv_args.alloc_attrs, &val);
- }
+ TensorProto proto;
+ CHECK(rm.tensor_bytes_ + RdmaMessage::kTensorBufferStartIndex <=
+ rb->size_);
+ CHECK(ParseProtoUnlimited(&proto, input, rm.tensor_bytes_))
+ << "fail to parse proto from array";
+ s = dst_dev->MakeTensorFromProto(proto, recv_args.alloc_attrs, &val);
}
rc->RemoveRecvCallback(key_with_step_id);
diff --git a/tensorflow/contrib/verbs/verbs_util.cc b/tensorflow/contrib/verbs/verbs_util.cc
index 76e44d34a9..c3350f7958 100644
--- a/tensorflow/contrib/verbs/verbs_util.cc
+++ b/tensorflow/contrib/verbs/verbs_util.cc
@@ -21,40 +21,6 @@ limitations under the License.
namespace tensorflow {
// static sync wrapper:
-Status VerbsUtil::CopyGPUTensorToCPUSync(Device* gpu_device,
- const DeviceContext* device_context,
- const Tensor* gpu_tensor,
- Tensor* cpu_tensor) {
- Notification n;
- Status status;
- GPUUtil::CopyGPUTensorToCPU(gpu_device, device_context,
- gpu_tensor, cpu_tensor,
- [&n, &status](const Status& s) {
- status = s;
- n.Notify();
- });
- n.WaitForNotification();
- return status;
-}
-
-// static sync wrapper:
-Status VerbsUtil::CopyCPUTensorToGPUSync(const Tensor* cpu_tensor,
- const DeviceContext* device_context,
- Device* gpu_device,
- Tensor* gpu_tensor) {
- Notification n;
- Status status;
- GPUUtil::CopyCPUTensorToGPU(cpu_tensor, device_context,
- gpu_device, gpu_tensor,
- [&n, &status](const Status& s) {
- status = s;
- n.Notify();
- });
- n.WaitForNotification();
- return status;
-}
-
-// static sync wrapper:
Status VerbsUtil::SetProtoFromGPUSync(const Tensor& tensor, Device* dev,
const DeviceContext* device_context,
TensorProto* proto, bool is_dead) {
diff --git a/tensorflow/contrib/verbs/verbs_util.h b/tensorflow/contrib/verbs/verbs_util.h
index d9da396228..cbc01adae4 100644
--- a/tensorflow/contrib/verbs/verbs_util.h
+++ b/tensorflow/contrib/verbs/verbs_util.h
@@ -28,16 +28,6 @@ class TensorProto;
class VerbsUtil {
public:
- // synchronous wrapper of CopyGPUTensorToCPU
- static Status CopyGPUTensorToCPUSync(Device* gpu_device,
- const DeviceContext* device_context,
- const Tensor* gpu_tensor,
- Tensor* cpu_tensor);
- // synchronous wrapper of CopyCPUTensorToGPU
- static Status CopyCPUTensorToGPUSync(const Tensor* cpu_tensor,
- const DeviceContext* device_context,
- Device* gpu_device,
- Tensor* gpu_tensor);
// synchronous wrapper of SetProtoFromGPU
static Status SetProtoFromGPUSync(const Tensor& tensor, Device* dev,
const DeviceContext* device_context,