2 files changed, 109 insertions, 5 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
index 10f6d21d54..124d5905b9 100644
--- a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
+++ b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
@@ -24,12 +24,17 @@ limitations under the License.
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#ifdef __APPLE__
+#include <IOKit/kext/KextManager.h>
+#include <mach-o/dyld.h>
+#else
 #if !defined(PLATFORM_WINDOWS)
 #include <link.h>
 #include <sys/sysmacros.h>
 #include <unistd.h>
 #endif
 #include <sys/stat.h>
+#endif
 #include <algorithm>
 #include <memory>
 #include <vector>
@@ -49,7 +54,9 @@ limitations under the License.
 namespace stream_executor {
 namespace cuda {
 
-#if !defined(PLATFORM_WINDOWS)
+#ifdef __APPLE__
+static const CFStringRef kDriverKextIdentifier = CFSTR("com.nvidia.CUDA");
+#elif !defined(PLATFORM_WINDOWS)
 static const char *kDriverVersionPath = "/proc/driver/nvidia/version";
 #endif
 
@@ -114,7 +121,31 @@ string Diagnostician::GetDevNodePath(int dev_node_ordinal) {
 }
 
 void Diagnostician::LogDiagnosticInformation() {
-#if !defined(PLATFORM_WINDOWS)
+#ifdef __APPLE__
+  CFStringRef kext_ids[1];
+  kext_ids[0] = kDriverKextIdentifier;
+  CFArrayRef kext_id_query = CFArrayCreate(nullptr, (const void **)kext_ids, 1,
+                                           &kCFTypeArrayCallBacks);
+  CFDictionaryRef kext_infos =
+      KextManagerCopyLoadedKextInfo(kext_id_query, nullptr);
+  CFRelease(kext_id_query);
+
+  CFDictionaryRef cuda_driver_info = nullptr;
+  if (CFDictionaryGetValueIfPresent(kext_infos, kDriverKextIdentifier,
+                                    (const void **)&cuda_driver_info)) {
+    bool started = CFBooleanGetValue((CFBooleanRef)CFDictionaryGetValue(
+        cuda_driver_info, CFSTR("OSBundleStarted")));
+    if (!started) {
+      LOG(INFO) << "kernel driver is installed, but does not appear to be "
+                   "running on this host "
+                << "(" << port::Hostname() << ")";
+    }
+  } else {
+    LOG(INFO) << "kernel driver does not appear to be installed on this host "
+              << "(" << port::Hostname() << ")";
+  }
+  CFRelease(kext_infos);
+#elif !defined(PLATFORM_WINDOWS)
   if (access(kDriverVersionPath, F_OK) != 0) {
     LOG(INFO) << "kernel driver does not appear to be running on this host "
               << "(" << port::Hostname() << "): "
@@ -168,7 +199,8 @@ void Diagnostician::LogDiagnosticInformation() {
 	  << DriverVersionStatusToString(kernel_version);
 #endif
 
-#if !defined(PLATFORM_WINDOWS)
+  // OS X kernel driver does not report version accurately
+#if !defined(__APPLE__) && !defined(PLATFORM_WINDOWS)
   if (kernel_version.ok() && dso_version.ok()) {
     WarnOnDsoKernelMismatch(dso_version, kernel_version);
   }
@@ -182,6 +214,29 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
       port::error::NOT_FOUND,
       "was unable to find libcuda.so DSO loaded into this program"));
 
+#if defined(__APPLE__)
+  // OSX CUDA libraries have names like: libcuda_310.41.15_mercury.dylib
+  const string prefix("libcuda_");
+  const string suffix("_mercury.dylib");
+  for (uint32_t image_index = 0; image_index < _dyld_image_count();
+       ++image_index) {
+    const string path(_dyld_get_image_name(image_index));
+    const size_t suffix_pos = path.rfind(suffix);
+    const size_t prefix_pos = path.rfind(prefix, suffix_pos);
+    if (prefix_pos == string::npos || suffix_pos == string::npos) {
+      // no match
+      continue;
+    }
+    const size_t start = prefix_pos + prefix.size();
+    if (start >= suffix_pos) {
+      // version not included
+      continue;
+    }
+    const size_t length = suffix_pos - start;
+    const string version = path.substr(start, length);
+    result = StringToDriverVersion(version);
+  }
+#else
 #if !defined(PLATFORM_WINDOWS) && !defined(ANDROID_TEGRA)
   // Callback used when iterating through DSOs. Looks for the driver-interfacing
   // DSO and yields its version number into the callback data, when found.
@@ -215,6 +270,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
 
   dl_iterate_phdr(iterate_phdr, &result);
 #endif
+#endif
 
   return result;
 }
@@ -259,7 +315,41 @@ void Diagnostician::WarnOnDsoKernelMismatch(
 
 
 port::StatusOr<DriverVersion> Diagnostician::FindKernelDriverVersion() {
-#if defined(PLATFORM_WINDOWS)
+#if defined(__APPLE__)
+  CFStringRef kext_ids[1];
+  kext_ids[0] = kDriverKextIdentifier;
+  CFArrayRef kext_id_query = CFArrayCreate(nullptr, (const void **)kext_ids, 1,
+                                           &kCFTypeArrayCallBacks);
+  CFDictionaryRef kext_infos =
+      KextManagerCopyLoadedKextInfo(kext_id_query, nullptr);
+  CFRelease(kext_id_query);
+
+  CFDictionaryRef cuda_driver_info = nullptr;
+  if (CFDictionaryGetValueIfPresent(kext_infos, kDriverKextIdentifier,
+                                    (const void **)&cuda_driver_info)) {
+    // NOTE: OSX CUDA driver does not currently store the same driver version
+    // in kCFBundleVersionKey as is returned by cuDriverGetVersion
+    CFRelease(kext_infos);
+    const CFStringRef str = (CFStringRef)CFDictionaryGetValue(
+        cuda_driver_info, kCFBundleVersionKey);
+    const char *version = CFStringGetCStringPtr(str, kCFStringEncodingUTF8);
+
+    // version can be NULL in which case treat it as empty string
+    // see
+    // https://developer.apple.com/library/mac/documentation/CoreFoundation/Conceptual/CFStrings/Articles/AccessingContents.html#//apple_ref/doc/uid/20001184-100980-TPXREF112
+    if (version == NULL) {
+      return StringToDriverVersion("");
+    }
+    return StringToDriverVersion(version);
+  }
+  CFRelease(kext_infos);
+  auto status = port::Status(
+      port::error::INTERNAL,
+      port::StrCat(
+          "failed to read driver bundle version: ",
+          CFStringGetCStringPtr(kDriverKextIdentifier, kCFStringEncodingUTF8)));
+  return status;
+#elif defined(PLATFORM_WINDOWS)
   auto status =
       port::Status(port::error::UNIMPLEMENTED,
                    "kernel reported driver version not implemented on Windows");
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index edf217875f..f11022ef1d 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -15,6 +15,9 @@ limitations under the License.
 
 #include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
 
+#if defined(__APPLE__)
+#include <mach-o/dyld.h>
+#endif
 #if defined(PLATFORM_WINDOWS)
 #include <windows.h>
 #define PATH_MAX MAX_PATH
@@ -176,12 +179,20 @@ bool CUDAExecutor::FindOnDiskForComputeCapability(
 //                 would return /usr/bin.
 static string GetBinaryDir(bool strip_exe) {
   char exe_path[PATH_MAX] = {0};
+#if defined(__APPLE__)
+  uint32_t buffer_size = 0U;
+  _NSGetExecutablePath(nullptr, &buffer_size);
+  char unresolved_path[buffer_size];
+  _NSGetExecutablePath(unresolved_path, &buffer_size);
+  CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1);
+#else
 #if defined(PLATFORM_WINDOWS)
   HMODULE hModule = GetModuleHandle(NULL);
   GetModuleFileName(hModule, exe_path, MAX_PATH);
 #else
   CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
 #endif
+#endif
   // Make sure it's null-terminated:
   exe_path[sizeof(exe_path) - 1] = 0;
 
@@ -843,7 +854,10 @@ CudaContext* CUDAExecutor::cuda_context() { return context_; }
 // For anything more complicated/prod-focused than this, you'll likely want to
 // turn to gsys' topology modeling.
 static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
-#if defined(PLATFORM_WINDOWS)
+#if defined(__APPLE__)
+  LOG(INFO) << "OS X does not support NUMA - returning NUMA node zero";
+  return 0;
+#elif defined(PLATFORM_WINDOWS)
   // Windows support for NUMA is not currently implemented. Return node 0.
   return 0;
 #elif defined(__aarch64__)