Disentangle the GPU code from the CPU code. This means a few things:

* The "core_cpu_internal" build target no longer includes files from the common_runtime/gpu/ directory. * tensorflow/core internal targets instead can get access to those headers via the "gpu_runtime" target. * The class "CopyTensor" is introduced. It lives in common_runtime/ but supports registration of copy functions so the "gpu_runtime" target can add a GPU->GPU copy ability if it is linked in. This registration should make it easier to add more device types in the future. * The "core_cpu" and "core_cpu_internal" build targets no longer reference GPUUtil::CopyViaDMA; rendezvous_mgr uses CopyTensor instead. Also the "copy_tensor" build target was not needed. Change: 112821119
author: Josh Levenberg <josh11b@tensorflow.org> 2016-01-22 14:45:34 -0800
committer: Vijay Vasudevan <vrv@google.com> 2016-01-22 17:04:38 -0800
commit: 4ba51b33357d68f882a920fb4f87bfe67bb034a0 (patch)
tree: 1d9741962b25bde407e90103c506ca9ab1ec2042 /tensorflow/core/common_runtime/rendezvous_mgr.cc
parent: fde1dc4a489471bb9064f7a0013b9c89f46febf8 (diff)
1 files changed, 5 insertions, 36 deletions
diff --git a/tensorflow/core/common_runtime/rendezvous_mgr.cc b/tensorflow/core/common_runtime/rendezvous_mgr.cc
index 95c11c075d..2f311f9db8 100644
--- a/tensorflow/core/common_runtime/rendezvous_mgr.cc
+++ b/tensorflow/core/common_runtime/rendezvous_mgr.cc
@@ -17,11 +17,9 @@ limitations under the License.
 
 #include <unordered_set>
 
+#include "tensorflow/core/common_runtime/copy_tensor.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
-#if !defined(__ANDROID__) && (defined(PLATFORM_GOOGLE) || GOOGLE_CUDA)
-#include "tensorflow/core/common_runtime/gpu/gpu_util.h"
-#endif
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/notification.h"
@@ -33,35 +31,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-namespace {
-
-void CopyTensorBetweenDevices(const string& id, DeviceContext* send_dev_context,
-                              DeviceContext* recv_dev_context, Device* src,
-                              Device* dst,
-                              const AllocatorAttributes src_alloc_attr,
-                              const AllocatorAttributes dst_alloc_attr,
-                              const Tensor* input, Tensor* output,
-                              std::function<void(const Status&)> done) {
-  if (src->attributes().device_type() != dst->attributes().device_type()) {
-    done(errors::Unimplemented(
-        "Copy between device types not yet implemented: src=", src->name(),
-        " dst=", dst->name()));
-  } else if (src->attributes().device_type() != "CPU") {
-    done(errors::Unimplemented(
-        "Copy between non-CPU devices not yet implemented"));
-  }
-  *output = *input;
-  done(Status::OK());
-}
-
-#if !defined(__ANDROID__) && (defined(PLATFORM_GOOGLE) || GOOGLE_CUDA)
-constexpr auto CopyTensorBetweenDevicesFunc = &GPUUtil::CopyViaDMA;
-#else
-constexpr auto CopyTensorBetweenDevicesFunc = &CopyTensorBetweenDevices;
-#endif
-
-}  // end namespace
-
 IntraProcessRendezvous::IntraProcessRendezvous(const DeviceMgr* device_mgr)
     : device_mgr_(device_mgr), local_(NewLocalRendezvous()) {}
 
@@ -136,10 +105,10 @@ void IntraProcessRendezvous::SameWorkerRecvDone(
   Tensor copy(out_allocator, in.dtype(), in.shape());
   *out = copy;
 
-  CopyTensorBetweenDevicesFunc(parsed.edge_name, send_args.device_context,
-                               recv_args.device_context, src_device, dst_device,
-                               send_args.alloc_attrs, recv_args.alloc_attrs,
-                               &in, out, done);
+  CopyTensor::ViaDMA(parsed.edge_name, send_args.device_context,
+                     recv_args.device_context, src_device, dst_device,
+                     send_args.alloc_attrs, recv_args.alloc_attrs, &in, out,
+                     done);
 }
 
 void IntraProcessRendezvous::RecvAsync(const string& key,
author	Josh Levenberg <josh11b@tensorflow.org>	2016-01-22 14:45:34 -0800
committer	Vijay Vasudevan <vrv@google.com>	2016-01-22 17:04:38 -0800
commit	4ba51b33357d68f882a920fb4f87bfe67bb034a0 (patch)
tree	1d9741962b25bde407e90103c506ca9ab1ec2042 /tensorflow/core/common_runtime/rendezvous_mgr.cc
parent	fde1dc4a489471bb9064f7a0013b9c89f46febf8 (diff)