aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-07-02 13:26:45 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-07-02 13:29:45 -0700
commitc1560f0b86cbe1059b091942f18100d993018c5f (patch)
treeb83e259384ee22b27accf3968e5b2b232b8c4135 /tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
parent8b53bfe12f2cc45cbccf0d1ffcf6150e89fdc97f (diff)
Refactor ProcessState in support of NUMA.
ProcessState is a singleton that anchors per-process resources. Up until now that meant only GPU-related memory allocators since CPU allocation was usually done directly from Allocator::cpu_allocator. Accordingly process_state.h was in common_runtime/gpu and ProcesState was only used in GPU builds. With the upcoming introduction of NUMA node specific CPU allocators it will be important that most of the TF runtime switch to requesting the proper NUMA-specific CPU allocator. These allocators will be owned by and obtained from the ProcessState singleton which will exist in all builds. The GPU-specific functions are moved to a new GPUProcessState, also a singleton. PoolAllocator is also migrated out of common_rumntime/gpu into common_runtime. PiperOrigin-RevId: 203002666
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/gpu_device_factory.cc')
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_device_factory.cc7
1 files changed, 4 insertions, 3 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
index 9a000749c6..e1aaf95df6 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
@@ -19,7 +19,7 @@ limitations under the License.
#include "tensorflow/core/common_runtime/gpu/gpu_device.h"
#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
-#include "tensorflow/core/common_runtime/gpu/process_state.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
#include "tensorflow/core/common_runtime/threadpool_device.h"
namespace tensorflow {
@@ -40,9 +40,10 @@ class GPUDevice : public BaseGPUDevice {
}
Allocator* GetAllocator(AllocatorAttributes attr) override {
+ CHECK(cpu_allocator_) << "bad place 1";
if (attr.on_host()) {
if (attr.gpu_compatible() || force_gpu_compatible_) {
- ProcessState* ps = ProcessState::singleton();
+ GPUProcessState* ps = GPUProcessState::singleton();
return ps->GetCUDAHostAllocator(0);
} else {
return cpu_allocator_;
@@ -90,7 +91,7 @@ class GPUCompatibleCPUDevice : public ThreadPoolDevice {
~GPUCompatibleCPUDevice() override {}
Allocator* GetAllocator(AllocatorAttributes attr) override {
- ProcessState* ps = ProcessState::singleton();
+ GPUProcessState* ps = GPUProcessState::singleton();
if (attr.gpu_compatible() || force_gpu_compatible_) {
return ps->GetCUDAHostAllocator(0);
} else {