1 files changed, 147 insertions, 0 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_init.cc b/tensorflow/core/common_runtime/gpu/gpu_init.cc
new file mode 100644
index 0000000000..631a47eb91
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_init.cc
@@ -0,0 +1,147 @@
+#include "tensorflow/core/common_runtime/gpu/gpu_init.h"
+
+#include <string>
+
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/stream_executor/multi_platform_manager.h"
+#include "tensorflow/stream_executor/stream_executor.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+
+namespace gpu = ::perftools::gputools;
+
+namespace tensorflow {
+
+namespace {
+
+std::unique_ptr<std::map<std::pair<int, int>, bool>> GetPeerAccessMap(
+    gpu::Platform* platform, int device_count) {
+  auto* map = new std::map<std::pair<int, int>, bool>;
+  for (int i = 0; i < device_count; ++i) {
+    for (int j = 0; j < device_count; ++j) {
+      gpu::StreamExecutor* from = platform->ExecutorForDevice(i).ValueOrDie();
+      gpu::StreamExecutor* to = platform->ExecutorForDevice(j).ValueOrDie();
+      (*map)[{i, j}] = from->CanEnablePeerAccessTo(to);
+    }
+  }
+
+  return std::unique_ptr<std::map<std::pair<int, int>, bool>>{map};
+}
+
+Status EnablePeerAccess(gpu::Platform* platform, int device_count) {
+  for (int i = 0; i < device_count; ++i) {
+    for (int j = 0; j < device_count; ++j) {
+      gpu::StreamExecutor* from = platform->ExecutorForDevice(i).ValueOrDie();
+      gpu::StreamExecutor* to = platform->ExecutorForDevice(j).ValueOrDie();
+
+      if (from->CanEnablePeerAccessTo(to)) {
+        auto status = from->EnablePeerAccessTo(to);
+        if (!status.ok()) {
+          return errors::Internal(status.ToString());
+        }
+      } else {
+        LOG(INFO) << "cannot enable peer access from device ordinal " << i
+                  << " to device ordinal " << j;
+      }
+    }
+  }
+  return Status::OK();
+}
+
+static void InitGPU() {
+  auto result = gpu::MultiPlatformManager::PlatformWithName("CUDA");
+  if (!result.ok()) {
+    LOG(WARNING)
+        << "Not initializing the GPU, could not create GPU MachineManager. "
+        << "Error: " << result.status();
+    return;
+  }
+
+  gpu::Platform* platform = result.ValueOrDie();
+
+  int dev_count = platform->VisibleDeviceCount();
+
+  if (dev_count == 0) {
+    LOG(INFO) << "No GPU devices available on machine.";
+    return;
+  }
+
+  for (int i = 0; i < dev_count; ++i) {
+    auto stream_exec = platform->ExecutorForDevice(i).ValueOrDie();
+    int64 free_bytes;
+    int64 total_bytes;
+    if (!stream_exec->DeviceMemoryUsage(&free_bytes, &total_bytes)) {
+      // Logs internally on failure.
+      free_bytes = 0;
+      total_bytes = 0;
+    }
+    const auto& description = stream_exec->GetDeviceDescription();
+    int cc_major;
+    int cc_minor;
+    if (!description.cuda_compute_capability(&cc_major, &cc_minor)) {
+      // Logs internally on failure.
+      cc_major = 0;
+      cc_minor = 0;
+    }
+    LOG(INFO) << "Found device " << i << " with properties: "
+              << "\nname: " << description.name() << "\nmajor: " << cc_major
+              << " minor: " << cc_minor << " memoryClockRate (GHz) "
+              << description.clock_rate_ghz() << "\npciBusID "
+              << description.pci_bus_id() << "\nTotal memory: "
+              << strings::HumanReadableNumBytes(total_bytes)
+              << "\nFree memory: "
+              << strings::HumanReadableNumBytes(free_bytes);
+  }
+
+  // Enable peer access
+
+  auto status = EnablePeerAccess(platform, dev_count);
+  if (!status.ok()) {
+    LOG(FATAL) << "could not enable peer access for GPU devices: " << status;
+  }
+
+  // Print out a matrix showing which devices can DMA to one
+  // another.
+  auto access_map = GetPeerAccessMap(platform, dev_count);
+  string line_buf = "DMA: ";
+  for (int i = 0; i < dev_count; ++i) {
+    strings::StrAppend(&line_buf, i, " ");
+  }
+  LOG(INFO) << line_buf;
+  for (int i = 0; i < dev_count; ++i) {
+    line_buf = strings::StrCat(i, ":   ");
+    for (int j = 0; j < dev_count; ++j) {
+      if ((*access_map)[{i, j}]) {
+        line_buf.append("Y ");
+      } else {
+        line_buf.append("N ");
+      }
+    }
+    LOG(INFO) << line_buf;
+  }
+}
+
+static bool InitModule() {
+  InitGPU();
+  return true;
+}
+
+}  // namespace
+
+gpu::Platform* GPUMachineManager() {
+  // Create the machine manager singleton and initialize the GPUs only
+  // once.
+  static bool init = InitModule();
+  CHECK(init);  // Avoids compiler warning that init is unused.
+
+  auto result = gpu::MultiPlatformManager::PlatformWithName("CUDA");
+  if (!result.ok()) {
+    return nullptr;
+  }
+
+  return result.ValueOrDie();
+}
+
+}  // namespace tensorflow