diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-06-26 14:00:17 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-06-26 14:04:35 -0700 |
commit | 1fa73c53ab95693f070ce70e6be0c644d83c163a (patch) | |
tree | ffbedf825daf1f3453c695a433c8a9cdf93f6019 /tensorflow/contrib/verbs | |
parent | b13e96e21c1229a905a623111dd89d2bd0cba53b (diff) |
Automated g4 rollback of changelist 160182040
PiperOrigin-RevId: 160190881
Diffstat (limited to 'tensorflow/contrib/verbs')
-rw-r--r-- | tensorflow/contrib/verbs/rdma.cc | 55 | ||||
-rw-r--r-- | tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc | 41 | ||||
-rw-r--r-- | tensorflow/contrib/verbs/verbs_util.cc | 34 | ||||
-rw-r--r-- | tensorflow/contrib/verbs/verbs_util.h | 10 |
4 files changed, 16 insertions, 124 deletions
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 6f3a616fe8..bc687be0ab 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -21,7 +21,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" #include "tensorflow/core/common_runtime/gpu/gpu_util.h" -#include "tensorflow/core/common_runtime/gpu/process_state.h" #include "tensorflow/core/distributed_runtime/rendezvous_mgr_interface.h" #include "tensorflow/core/distributed_runtime/session_mgr.h" #include "tensorflow/core/framework/rendezvous.h" @@ -684,6 +683,7 @@ void RdmaTensorBuffer::SendNextItem() { << " error message: " << status.error_message(); size_t buffer_size = RdmaMessage::kMessageTotalBytes; size_t tensor_bytes = 0; + TensorProto proto; // Figures out which device the tensor is hosted on. Device* src_dev = nullptr; Status s = channel_->adapter_->worker_env_->device_mgr->LookupDevice( @@ -703,47 +703,21 @@ void RdmaTensorBuffer::SendNextItem() { CHECK(s.ok()) << "dst device not found"; AllocatorAttributes dst_alloc_attr; dst_alloc_attr.set_on_host(true); - - bool can_memcpy = DataTypeCanUseMemcpy(in.dtype()); // string tensor needs to be serialized - Tensor copy; - StringPiece copy_buf; - TensorProto proto; if (src_dev->tensorflow_gpu_device_info() && (!send_args.alloc_attrs.on_host())) { CHECK(send_args.device_context) - << "send dev name: " << src_dev->name() - << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); - - if (can_memcpy) { - AllocatorAttributes host_alloc_attrs; - host_alloc_attrs.set_gpu_compatible(true); - host_alloc_attrs.set_on_host(true); - Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0); - copy = Tensor(alloc, in.dtype(), in.shape()); - s = VerbsUtil::CopyGPUTensorToCPUSync( - src_dev, send_args.device_context, &in, ©); - CHECK(s.ok()) << "copy tensor from gpu sync"; - copy_buf = copy.tensor_data(); - } else { - // "val" is on a GPU. Uses GPUUtil to fill the proto. - s = VerbsUtil::SetProtoFromGPUSync( - in, src_dev, send_args.device_context, &proto, is_dead); - CHECK(s.ok()) << "set proto from gpu sync"; - } + << "send dev name: " << src_dev->name() + << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); + // "val" is on a GPU. Uses GPUUtil to fill the proto. + s = VerbsUtil::SetProtoFromGPUSync( + in, src_dev, send_args.device_context, &proto, is_dead); + CHECK(s.ok()) << "set proto from gpu sync"; } else { // tensor is in CPU memory. - if (can_memcpy) { - copy_buf = in.tensor_data(); - } else { - in.AsProtoTensorContent(&proto); - } - } - if (can_memcpy) { - tensor_bytes = in.TotalBytes(); - } else { - tensor_bytes = proto.ByteSize(); + in.AsProtoTensorContent(&proto); } + tensor_bytes = proto.ByteSize(); // maybe some margin for string tensor? buffer_size += tensor_bytes; // prepare message @@ -797,16 +771,7 @@ void RdmaTensorBuffer::SendNextItem() { static_cast<void*>(static_cast<char*>(buffer_) + RdmaMessage::kTensorBufferStartIndex); CHECK(tensor_bytes + RdmaMessage::kTensorBufferStartIndex <= size_); - if (can_memcpy) { - CHECK(copy_buf.size() == tensor_bytes) - << "unexpected tensor size: " - << copy_buf.size() - << " != " - << tensor_bytes; - memcpy(output, copy_buf.data(), tensor_bytes); - } else { - proto.SerializeToArray(output, tensor_bytes); - } + proto.SerializeToArray(output, tensor_bytes); } else { buffer_size = RdmaMessage::kMessageTotalBytes; } diff --git a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc index 9ea696589a..5871400f26 100644 --- a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc @@ -21,7 +21,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" -#include "tensorflow/core/common_runtime/gpu/process_state.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" @@ -100,40 +99,12 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( if (!rm.is_dead_) { void* input = static_cast<char*>(rb->buffer_) + RdmaMessage::kTensorBufferStartIndex; - bool can_memcpy = DataTypeCanUseMemcpy(rm.data_type_); - if (can_memcpy) { - if (dst_dev->tensorflow_gpu_device_info() && - (!recv_args.alloc_attrs.on_host())) { - CHECK(recv_args.device_context) - << "send dev name: " << src_dev->name() - << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); - Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0); - Tensor copy(alloc, rm.data_type_, rm.tensor_shape_); - memcpy(DMAHelper::base(©), input, rm.tensor_bytes_); - - Allocator* dst_alloc = dst_dev->GetAllocator(recv_args.alloc_attrs); - Tensor gpu_copy(dst_alloc, rm.data_type_, rm.tensor_shape_); - s = VerbsUtil::CopyCPUTensorToGPUSync(©, recv_args.device_context, - dst_dev, &gpu_copy); - CHECK(s.ok()) << "copy tensor to gpu sync"; - val = std::move(gpu_copy); - } else { - AllocatorAttributes host_alloc_attrs; - host_alloc_attrs.set_gpu_compatible(true); - host_alloc_attrs.set_on_host(true); - Allocator* alloc = dst_dev->GetAllocator(host_alloc_attrs); - Tensor copy(alloc, rm.data_type_, rm.tensor_shape_); - memcpy(DMAHelper::base(©), input, rm.tensor_bytes_); - val = std::move(copy); - } - } else { - TensorProto proto; - CHECK(rm.tensor_bytes_ + RdmaMessage::kTensorBufferStartIndex <= - rb->size_); - CHECK(ParseProtoUnlimited(&proto, input, rm.tensor_bytes_)) - << "fail to parse proto from array"; - s = dst_dev->MakeTensorFromProto(proto, recv_args.alloc_attrs, &val); - } + TensorProto proto; + CHECK(rm.tensor_bytes_ + RdmaMessage::kTensorBufferStartIndex <= + rb->size_); + CHECK(ParseProtoUnlimited(&proto, input, rm.tensor_bytes_)) + << "fail to parse proto from array"; + s = dst_dev->MakeTensorFromProto(proto, recv_args.alloc_attrs, &val); } rc->RemoveRecvCallback(key_with_step_id); diff --git a/tensorflow/contrib/verbs/verbs_util.cc b/tensorflow/contrib/verbs/verbs_util.cc index 76e44d34a9..c3350f7958 100644 --- a/tensorflow/contrib/verbs/verbs_util.cc +++ b/tensorflow/contrib/verbs/verbs_util.cc @@ -21,40 +21,6 @@ limitations under the License. namespace tensorflow { // static sync wrapper: -Status VerbsUtil::CopyGPUTensorToCPUSync(Device* gpu_device, - const DeviceContext* device_context, - const Tensor* gpu_tensor, - Tensor* cpu_tensor) { - Notification n; - Status status; - GPUUtil::CopyGPUTensorToCPU(gpu_device, device_context, - gpu_tensor, cpu_tensor, - [&n, &status](const Status& s) { - status = s; - n.Notify(); - }); - n.WaitForNotification(); - return status; -} - -// static sync wrapper: -Status VerbsUtil::CopyCPUTensorToGPUSync(const Tensor* cpu_tensor, - const DeviceContext* device_context, - Device* gpu_device, - Tensor* gpu_tensor) { - Notification n; - Status status; - GPUUtil::CopyCPUTensorToGPU(cpu_tensor, device_context, - gpu_device, gpu_tensor, - [&n, &status](const Status& s) { - status = s; - n.Notify(); - }); - n.WaitForNotification(); - return status; -} - -// static sync wrapper: Status VerbsUtil::SetProtoFromGPUSync(const Tensor& tensor, Device* dev, const DeviceContext* device_context, TensorProto* proto, bool is_dead) { diff --git a/tensorflow/contrib/verbs/verbs_util.h b/tensorflow/contrib/verbs/verbs_util.h index d9da396228..cbc01adae4 100644 --- a/tensorflow/contrib/verbs/verbs_util.h +++ b/tensorflow/contrib/verbs/verbs_util.h @@ -28,16 +28,6 @@ class TensorProto; class VerbsUtil { public: - // synchronous wrapper of CopyGPUTensorToCPU - static Status CopyGPUTensorToCPUSync(Device* gpu_device, - const DeviceContext* device_context, - const Tensor* gpu_tensor, - Tensor* cpu_tensor); - // synchronous wrapper of CopyCPUTensorToGPU - static Status CopyCPUTensorToGPUSync(const Tensor* cpu_tensor, - const DeviceContext* device_context, - Device* gpu_device, - Tensor* gpu_tensor); // synchronous wrapper of SetProtoFromGPU static Status SetProtoFromGPUSync(const Tensor& tensor, Device* dev, const DeviceContext* device_context, |