diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-07-02 13:26:45 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-07-02 13:29:45 -0700 |
commit | c1560f0b86cbe1059b091942f18100d993018c5f (patch) | |
tree | b83e259384ee22b27accf3968e5b2b232b8c4135 /tensorflow/contrib/gdr | |
parent | 8b53bfe12f2cc45cbccf0d1ffcf6150e89fdc97f (diff) |
Refactor ProcessState in support of NUMA.
ProcessState is a singleton that anchors per-process resources.
Up until now that meant only GPU-related memory allocators
since CPU allocation was usually done directly from Allocator::cpu_allocator.
Accordingly process_state.h was in common_runtime/gpu and ProcesState
was only used in GPU builds.
With the upcoming introduction of NUMA node specific CPU allocators
it will be important that most of the TF runtime switch to requesting the
proper NUMA-specific CPU allocator. These allocators will be owned by
and obtained from the ProcessState singleton which will exist in all
builds. The GPU-specific functions are moved to a new
GPUProcessState, also a singleton.
PoolAllocator is also migrated out of common_rumntime/gpu into common_runtime.
PiperOrigin-RevId: 203002666
Diffstat (limited to 'tensorflow/contrib/gdr')
-rw-r--r-- | tensorflow/contrib/gdr/gdr_memory_manager.cc | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc index 81e70ae30a..1435e19109 100644 --- a/tensorflow/contrib/gdr/gdr_memory_manager.cc +++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc @@ -34,8 +34,9 @@ limitations under the License. #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/dma_helper.h" #if GOOGLE_CUDA +#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h" #include "tensorflow/core/common_runtime/gpu/gpu_util.h" -#include "tensorflow/core/common_runtime/gpu/process_state.h" +#include "tensorflow/core/common_runtime/process_state.h" #endif // GOOGLE_CUDA #include "tensorflow/core/framework/allocator_registry.h" #include "tensorflow/core/lib/core/status.h" @@ -274,7 +275,7 @@ Status GdrMemoryManager::Init() { Allocator* allocators[] = { #if GOOGLE_CUDA - ProcessState::singleton()->GetCUDAHostAllocator(0), + GPUProcessState::singleton()->GetCUDAHostAllocator(0), ProcessState::singleton()->GetCPUAllocator(0), #endif // GOOGLE_CUDA cpu_allocator(), @@ -308,7 +309,8 @@ Status GdrMemoryManager::Init() { if (IsGDRAvailable()) { // Note we don't free allocated GPU memory so there is no free visitor int32_t bus_id = TryToReadNumaNode(listening_->verbs->device) + 1; - ProcessState::singleton()->AddGPUAllocVisitor(bus_id, cuda_alloc_visitor); + GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, + cuda_alloc_visitor); LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; } #endif // GOOGLE_CUDA @@ -430,7 +432,7 @@ void GdrMemoryManager::TransportOptionsFromTensor( #if GOOGLE_CUDA if (!on_host) { - Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0); + Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0); Tensor* host_copy = new Tensor(alloc, tensor.dtype(), tensor.shape()); GPUUtil::CopyGPUTensorToCPU( device, device_context, &tensor, host_copy, @@ -532,7 +534,7 @@ void GdrMemoryManager::TensorFromTransportOptions( Tensor host_copy; #if GOOGLE_CUDA if (mr == nullptr && !on_host) { - Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0); + Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0); host_copy = Tensor(alloc, tensor->dtype(), tensor->shape()); buffer = DMAHelper::buffer(&host_copy); addr = buffer->data(); |