diff options
author | Vijay Vasudevan <vrv@google.com> | 2016-09-02 19:32:03 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-09-02 20:47:22 -0700 |
commit | 5a96e4e672109b29951d2f65fcf46b00d85e025a (patch) | |
tree | fa74fe697299f7e0da2ff03008dead2fc954d1da /tensorflow/core/common_runtime/gpu/process_state.cc | |
parent | f504644d1554471c0af5184793ee46c419b849b1 (diff) |
Make CUDA host allocator fetch one of the available stream executors
in the process, not the 0th one, which may not be visible to the process.
Fixes #1888 (for real this time?)
Change: 132128469
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/process_state.cc')
-rw-r--r-- | tensorflow/core/common_runtime/gpu/process_state.cc | 23 |
1 files changed, 18 insertions, 5 deletions
diff --git a/tensorflow/core/common_runtime/gpu/process_state.cc b/tensorflow/core/common_runtime/gpu/process_state.cc index f85b37cb8f..60da115988 100644 --- a/tensorflow/core/common_runtime/gpu/process_state.cc +++ b/tensorflow/core/common_runtime/gpu/process_state.cc @@ -181,12 +181,25 @@ Allocator* ProcessState::GetCUDAHostAllocator(int numa_node) { // different numa_nodes. For now, just one. numa_node = 0; mutex_lock lock(mu_); + + // Find the first valid StreamExecutor to request CUDA host memory + // through, since any will work. + // + // This search isn't super clean, and it would be nice to use a + // better source of information about which executor to use. For + // example, process_state could maybe save the first stream executor + // it knows is valid. + gpu::StreamExecutor* se = nullptr; + for (size_t i = 0; i < gpu_allocators_.size(); ++i) { + if (gpu_allocators_[i] != nullptr) { + se = GPUMachineManager()->ExecutorForDevice(i).ValueOrDie(); + break; + } + } + + CHECK_NE(nullptr, se); + while (static_cast<int>(cuda_host_allocators_.size()) <= numa_node) { - // CUDAHost alloc the same across all gpus, so just get the - // executor for the first device. - gpu::Platform* gpu_platform = GPUMachineManager(); - gpu::StreamExecutor* se = gpu_platform->ExecutorForDevice(0).ValueOrDie(); - CHECK(se); Allocator* allocator = nullptr; static constexpr bool kCudaHostMemoryUseBFC = true; if (kCudaHostMemoryUseBFC) { |