diff options
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/gpu_init.cc')
-rw-r--r-- | tensorflow/core/common_runtime/gpu/gpu_init.cc | 147 |
1 files changed, 147 insertions, 0 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_init.cc b/tensorflow/core/common_runtime/gpu/gpu_init.cc new file mode 100644 index 0000000000..631a47eb91 --- /dev/null +++ b/tensorflow/core/common_runtime/gpu/gpu_init.cc @@ -0,0 +1,147 @@ +#include "tensorflow/core/common_runtime/gpu/gpu_init.h" + +#include <string> + +#include "tensorflow/core/platform/port.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/stream_executor/multi_platform_manager.h" +#include "tensorflow/stream_executor/stream_executor.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/strings/numbers.h" +#include "tensorflow/core/lib/strings/strcat.h" + +namespace gpu = ::perftools::gputools; + +namespace tensorflow { + +namespace { + +std::unique_ptr<std::map<std::pair<int, int>, bool>> GetPeerAccessMap( + gpu::Platform* platform, int device_count) { + auto* map = new std::map<std::pair<int, int>, bool>; + for (int i = 0; i < device_count; ++i) { + for (int j = 0; j < device_count; ++j) { + gpu::StreamExecutor* from = platform->ExecutorForDevice(i).ValueOrDie(); + gpu::StreamExecutor* to = platform->ExecutorForDevice(j).ValueOrDie(); + (*map)[{i, j}] = from->CanEnablePeerAccessTo(to); + } + } + + return std::unique_ptr<std::map<std::pair<int, int>, bool>>{map}; +} + +Status EnablePeerAccess(gpu::Platform* platform, int device_count) { + for (int i = 0; i < device_count; ++i) { + for (int j = 0; j < device_count; ++j) { + gpu::StreamExecutor* from = platform->ExecutorForDevice(i).ValueOrDie(); + gpu::StreamExecutor* to = platform->ExecutorForDevice(j).ValueOrDie(); + + if (from->CanEnablePeerAccessTo(to)) { + auto status = from->EnablePeerAccessTo(to); + if (!status.ok()) { + return errors::Internal(status.ToString()); + } + } else { + LOG(INFO) << "cannot enable peer access from device ordinal " << i + << " to device ordinal " << j; + } + } + } + return Status::OK(); +} + +static void InitGPU() { + auto result = gpu::MultiPlatformManager::PlatformWithName("CUDA"); + if (!result.ok()) { + LOG(WARNING) + << "Not initializing the GPU, could not create GPU MachineManager. " + << "Error: " << result.status(); + return; + } + + gpu::Platform* platform = result.ValueOrDie(); + + int dev_count = platform->VisibleDeviceCount(); + + if (dev_count == 0) { + LOG(INFO) << "No GPU devices available on machine."; + return; + } + + for (int i = 0; i < dev_count; ++i) { + auto stream_exec = platform->ExecutorForDevice(i).ValueOrDie(); + int64 free_bytes; + int64 total_bytes; + if (!stream_exec->DeviceMemoryUsage(&free_bytes, &total_bytes)) { + // Logs internally on failure. + free_bytes = 0; + total_bytes = 0; + } + const auto& description = stream_exec->GetDeviceDescription(); + int cc_major; + int cc_minor; + if (!description.cuda_compute_capability(&cc_major, &cc_minor)) { + // Logs internally on failure. + cc_major = 0; + cc_minor = 0; + } + LOG(INFO) << "Found device " << i << " with properties: " + << "\nname: " << description.name() << "\nmajor: " << cc_major + << " minor: " << cc_minor << " memoryClockRate (GHz) " + << description.clock_rate_ghz() << "\npciBusID " + << description.pci_bus_id() << "\nTotal memory: " + << strings::HumanReadableNumBytes(total_bytes) + << "\nFree memory: " + << strings::HumanReadableNumBytes(free_bytes); + } + + // Enable peer access + + auto status = EnablePeerAccess(platform, dev_count); + if (!status.ok()) { + LOG(FATAL) << "could not enable peer access for GPU devices: " << status; + } + + // Print out a matrix showing which devices can DMA to one + // another. + auto access_map = GetPeerAccessMap(platform, dev_count); + string line_buf = "DMA: "; + for (int i = 0; i < dev_count; ++i) { + strings::StrAppend(&line_buf, i, " "); + } + LOG(INFO) << line_buf; + for (int i = 0; i < dev_count; ++i) { + line_buf = strings::StrCat(i, ": "); + for (int j = 0; j < dev_count; ++j) { + if ((*access_map)[{i, j}]) { + line_buf.append("Y "); + } else { + line_buf.append("N "); + } + } + LOG(INFO) << line_buf; + } +} + +static bool InitModule() { + InitGPU(); + return true; +} + +} // namespace + +gpu::Platform* GPUMachineManager() { + // Create the machine manager singleton and initialize the GPUs only + // once. + static bool init = InitModule(); + CHECK(init); // Avoids compiler warning that init is unused. + + auto result = gpu::MultiPlatformManager::PlatformWithName("CUDA"); + if (!result.ok()) { + return nullptr; + } + + return result.ValueOrDie(); +} + +} // namespace tensorflow |