Refactor ProcessState in support of NUMA.

ProcessState is a singleton that anchors per-process resources. Up until now that meant only GPU-related memory allocators since CPU allocation was usually done directly from Allocator::cpu_allocator. Accordingly process_state.h was in common_runtime/gpu and ProcesState was only used in GPU builds. With the upcoming introduction of NUMA node specific CPU allocators it will be important that most of the TF runtime switch to requesting the proper NUMA-specific CPU allocator. These allocators will be owned by and obtained from the ProcessState singleton which will exist in all builds. The GPU-specific functions are moved to a new GPUProcessState, also a singleton. PoolAllocator is also migrated out of common_rumntime/gpu into common_runtime. PiperOrigin-RevId: 203002666
author: A. Unique TensorFlower <gardener@tensorflow.org> 2018-07-02 13:26:45 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-07-02 13:29:45 -0700
commit: c1560f0b86cbe1059b091942f18100d993018c5f (patch)
tree: b83e259384ee22b27accf3968e5b2b232b8c4135 /tensorflow/contrib/verbs
parent: 8b53bfe12f2cc45cbccf0d1ffcf6150e89fdc97f (diff)
2 files changed, 8 insertions, 6 deletions
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index 86350a08e5..f7c979e863 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -24,8 +24,8 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/dma_helper.h"
 #include "tensorflow/core/common_runtime/process_util.h"
 #if GOOGLE_CUDA
+#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
-#include "tensorflow/core/common_runtime/gpu/process_state.h"
 #endif
 #include "tensorflow/core/distributed_runtime/rendezvous_mgr_interface.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h"
@@ -1084,7 +1084,7 @@ void RdmaTensorResponse::RecvHandler(Rendezvous::ParsedKey parsed,
       // The tensor must be copied from GPU to CPU, because either:
       // 1. The tensor is located on a non GDR compatible GPU.
       // 2. The tensor's meta-data has changed.
-      Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0);
+      Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0);
       copy = Tensor(alloc, in.dtype(), in.shape());
       CountCopies(rm_.name_, (void*)DMAHelper::base(&in),
                   (void*)DMAHelper::base(&copy), in.TotalBytes(), true);
@@ -1541,7 +1541,7 @@ bool RdmaTensorRequest::AllocateTensors() {
     if (mr_ == nullptr) {
       // Can't RDMA directly to result. Use a proxy.
       proxy_tensor_ =
-          new Tensor(ProcessState::singleton()->GetCUDAHostAllocator(0),
+          new Tensor(GPUProcessState::singleton()->GetCUDAHostAllocator(0),
                      result_tensor_->dtype(), result_tensor_->shape());
       rdma_addr_ = DMAHelper::base(proxy_tensor_);
       mr_ =
diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index 369bd986df..9cb3d1fbbf 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -21,8 +21,9 @@ limitations under the License.
 #include "tensorflow/contrib/verbs/grpc_verbs_client.h"
 #include "tensorflow/contrib/verbs/verbs_service.pb.h"
 #include "tensorflow/core/common_runtime/bfc_allocator.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
-#include "tensorflow/core/common_runtime/gpu/process_state.h"
+#include "tensorflow/core/common_runtime/process_state.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.h"
 #include "tensorflow/core/distributed_runtime/session_mgr.h"
 #include "tensorflow/core/framework/allocator_registry.h"
@@ -282,7 +283,7 @@ void RdmaMgr::InitAllocators() {
 
   Allocator* allocators[] = {
 #if GOOGLE_CUDA
-    ProcessState::singleton()->GetCUDAHostAllocator(0),
+    GPUProcessState::singleton()->GetCUDAHostAllocator(0),
     ProcessState::singleton()->GetCPUAllocator(0),
 #endif  // GOOGLE_CUDA
     cpu_allocator(),
@@ -323,7 +324,8 @@ void RdmaMgr::InitAllocators() {
         std::bind(&RdmaMemoryMgr::InsertMemoryRegion,
                   &RdmaMemoryMgr::Singleton(), _1, _2, std::string(buf));
 
-    ProcessState::singleton()->AddGPUAllocVisitor(bus_id, cuda_alloc_visitor);
+    GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
+                                                     cuda_alloc_visitor);
     LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
   }
 #endif  // GOOGLE_CUDA
author	A. Unique TensorFlower <gardener@tensorflow.org>	2018-07-02 13:26:45 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-07-02 13:29:45 -0700
commit	c1560f0b86cbe1059b091942f18100d993018c5f (patch)
tree	b83e259384ee22b27accf3968e5b2b232b8c4135 /tensorflow/contrib/verbs
parent	8b53bfe12f2cc45cbccf0d1ffcf6150e89fdc97f (diff)