aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/common_runtime/gpu/process_state.cc
diff options
context:
space:
mode:
authorGravatar Vijay Vasudevan <vrv@google.com>2016-09-02 19:32:03 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-09-02 20:47:22 -0700
commit5a96e4e672109b29951d2f65fcf46b00d85e025a (patch)
treefa74fe697299f7e0da2ff03008dead2fc954d1da /tensorflow/core/common_runtime/gpu/process_state.cc
parentf504644d1554471c0af5184793ee46c419b849b1 (diff)
Make CUDA host allocator fetch one of the available stream executors
in the process, not the 0th one, which may not be visible to the process. Fixes #1888 (for real this time?) Change: 132128469
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/process_state.cc')
-rw-r--r--tensorflow/core/common_runtime/gpu/process_state.cc23
1 files changed, 18 insertions, 5 deletions
diff --git a/tensorflow/core/common_runtime/gpu/process_state.cc b/tensorflow/core/common_runtime/gpu/process_state.cc
index f85b37cb8f..60da115988 100644
--- a/tensorflow/core/common_runtime/gpu/process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/process_state.cc
@@ -181,12 +181,25 @@ Allocator* ProcessState::GetCUDAHostAllocator(int numa_node) {
// different numa_nodes. For now, just one.
numa_node = 0;
mutex_lock lock(mu_);
+
+ // Find the first valid StreamExecutor to request CUDA host memory
+ // through, since any will work.
+ //
+ // This search isn't super clean, and it would be nice to use a
+ // better source of information about which executor to use. For
+ // example, process_state could maybe save the first stream executor
+ // it knows is valid.
+ gpu::StreamExecutor* se = nullptr;
+ for (size_t i = 0; i < gpu_allocators_.size(); ++i) {
+ if (gpu_allocators_[i] != nullptr) {
+ se = GPUMachineManager()->ExecutorForDevice(i).ValueOrDie();
+ break;
+ }
+ }
+
+ CHECK_NE(nullptr, se);
+
while (static_cast<int>(cuda_host_allocators_.size()) <= numa_node) {
- // CUDAHost alloc the same across all gpus, so just get the
- // executor for the first device.
- gpu::Platform* gpu_platform = GPUMachineManager();
- gpu::StreamExecutor* se = gpu_platform->ExecutorForDevice(0).ValueOrDie();
- CHECK(se);
Allocator* allocator = nullptr;
static constexpr bool kCudaHostMemoryUseBFC = true;
if (kCudaHostMemoryUseBFC) {