aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/gdr/gdr_memory_manager.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/contrib/gdr/gdr_memory_manager.cc')
-rw-r--r--tensorflow/contrib/gdr/gdr_memory_manager.cc42
1 files changed, 21 insertions, 21 deletions
diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc
index 81e70ae30a..f3bbf6b4d7 100644
--- a/tensorflow/contrib/gdr/gdr_memory_manager.cc
+++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc
@@ -33,9 +33,11 @@ limitations under the License.
#include "tensorflow/core/common_runtime/bfc_allocator.h"
#include "tensorflow/core/common_runtime/device.h"
#include "tensorflow/core/common_runtime/dma_helper.h"
+#include "tensorflow/core/common_runtime/pool_allocator.h"
+#include "tensorflow/core/common_runtime/process_state.h"
#if GOOGLE_CUDA
+#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
#include "tensorflow/core/common_runtime/gpu/gpu_util.h"
-#include "tensorflow/core/common_runtime/gpu/process_state.h"
#endif // GOOGLE_CUDA
#include "tensorflow/core/framework/allocator_registry.h"
#include "tensorflow/core/lib/core/status.h"
@@ -181,28 +183,25 @@ class GdrMemoryManager : public RemoteMemoryManager {
TF_DISALLOW_COPY_AND_ASSIGN(GdrMemoryManager);
};
-// TODO(byronyi): remove this class duplicated from the one in
-// common/runtime/gpu/pool_allocator.h when it is available in common_runtime
-class BasicCPUAllocator : public SubAllocator {
- public:
- ~BasicCPUAllocator() override {}
-
- void* Alloc(size_t alignment, size_t num_bytes) override {
- return port::AlignedMalloc(num_bytes, alignment);
- }
- void Free(void* ptr, size_t) override { port::AlignedFree(ptr); }
-};
-
// TODO(byronyi): remove this class and its registration when the default
-// cpu_allocator() returns visitable allocator
+// cpu_allocator() returns visitable allocator, or cpu_allocator() is no
+// longer in use.
class BFCRdmaAllocator : public BFCAllocator {
public:
BFCRdmaAllocator()
- : BFCAllocator(new BasicCPUAllocator(), 1LL << 36, true, "cpu_rdma_bfc") {
+ : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36,
+ true, "cpu_rdma_bfc") {}
+};
+class BFCRdmaAllocatorFactory : public AllocatorFactory {
+ public:
+ Allocator* CreateAllocator() override { return new BFCRdmaAllocator; }
+
+ virtual SubAllocator* CreateSubAllocator(int numa_node) {
+ return new BasicCPUAllocator(numa_node);
}
};
-REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocator);
+REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocatorFactory);
GdrMemoryManager::GdrMemoryManager(const string& host, const string& port)
: host_(host),
@@ -274,9 +273,9 @@ Status GdrMemoryManager::Init() {
Allocator* allocators[] = {
#if GOOGLE_CUDA
- ProcessState::singleton()->GetCUDAHostAllocator(0),
- ProcessState::singleton()->GetCPUAllocator(0),
+ GPUProcessState::singleton()->GetCUDAHostAllocator(0),
#endif // GOOGLE_CUDA
+ ProcessState::singleton()->GetCPUAllocator(0),
cpu_allocator(),
};
@@ -308,7 +307,8 @@ Status GdrMemoryManager::Init() {
if (IsGDRAvailable()) {
// Note we don't free allocated GPU memory so there is no free visitor
int32_t bus_id = TryToReadNumaNode(listening_->verbs->device) + 1;
- ProcessState::singleton()->AddGPUAllocVisitor(bus_id, cuda_alloc_visitor);
+ GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
+ cuda_alloc_visitor);
LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
}
#endif // GOOGLE_CUDA
@@ -430,7 +430,7 @@ void GdrMemoryManager::TransportOptionsFromTensor(
#if GOOGLE_CUDA
if (!on_host) {
- Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0);
+ Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0);
Tensor* host_copy = new Tensor(alloc, tensor.dtype(), tensor.shape());
GPUUtil::CopyGPUTensorToCPU(
device, device_context, &tensor, host_copy,
@@ -532,7 +532,7 @@ void GdrMemoryManager::TensorFromTransportOptions(
Tensor host_copy;
#if GOOGLE_CUDA
if (mr == nullptr && !on_host) {
- Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0);
+ Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0);
host_copy = Tensor(alloc, tensor->dtype(), tensor->shape());
buffer = DMAHelper::buffer(&host_copy);
addr = buffer->data();