aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/stream_executor/cuda')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_diagnostics.cc98
-rw-r--r--tensorflow/stream_executor/cuda/cuda_gpu_executor.cc16
2 files changed, 109 insertions, 5 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
index 10f6d21d54..124d5905b9 100644
--- a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
+++ b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
@@ -24,12 +24,17 @@ limitations under the License.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#ifdef __APPLE__
+#include <IOKit/kext/KextManager.h>
+#include <mach-o/dyld.h>
+#else
#if !defined(PLATFORM_WINDOWS)
#include <link.h>
#include <sys/sysmacros.h>
#include <unistd.h>
#endif
#include <sys/stat.h>
+#endif
#include <algorithm>
#include <memory>
#include <vector>
@@ -49,7 +54,9 @@ limitations under the License.
namespace stream_executor {
namespace cuda {
-#if !defined(PLATFORM_WINDOWS)
+#ifdef __APPLE__
+static const CFStringRef kDriverKextIdentifier = CFSTR("com.nvidia.CUDA");
+#elif !defined(PLATFORM_WINDOWS)
static const char *kDriverVersionPath = "/proc/driver/nvidia/version";
#endif
@@ -114,7 +121,31 @@ string Diagnostician::GetDevNodePath(int dev_node_ordinal) {
}
void Diagnostician::LogDiagnosticInformation() {
-#if !defined(PLATFORM_WINDOWS)
+#ifdef __APPLE__
+ CFStringRef kext_ids[1];
+ kext_ids[0] = kDriverKextIdentifier;
+ CFArrayRef kext_id_query = CFArrayCreate(nullptr, (const void **)kext_ids, 1,
+ &kCFTypeArrayCallBacks);
+ CFDictionaryRef kext_infos =
+ KextManagerCopyLoadedKextInfo(kext_id_query, nullptr);
+ CFRelease(kext_id_query);
+
+ CFDictionaryRef cuda_driver_info = nullptr;
+ if (CFDictionaryGetValueIfPresent(kext_infos, kDriverKextIdentifier,
+ (const void **)&cuda_driver_info)) {
+ bool started = CFBooleanGetValue((CFBooleanRef)CFDictionaryGetValue(
+ cuda_driver_info, CFSTR("OSBundleStarted")));
+ if (!started) {
+ LOG(INFO) << "kernel driver is installed, but does not appear to be "
+ "running on this host "
+ << "(" << port::Hostname() << ")";
+ }
+ } else {
+ LOG(INFO) << "kernel driver does not appear to be installed on this host "
+ << "(" << port::Hostname() << ")";
+ }
+ CFRelease(kext_infos);
+#elif !defined(PLATFORM_WINDOWS)
if (access(kDriverVersionPath, F_OK) != 0) {
LOG(INFO) << "kernel driver does not appear to be running on this host "
<< "(" << port::Hostname() << "): "
@@ -168,7 +199,8 @@ void Diagnostician::LogDiagnosticInformation() {
<< DriverVersionStatusToString(kernel_version);
#endif
-#if !defined(PLATFORM_WINDOWS)
+ // OS X kernel driver does not report version accurately
+#if !defined(__APPLE__) && !defined(PLATFORM_WINDOWS)
if (kernel_version.ok() && dso_version.ok()) {
WarnOnDsoKernelMismatch(dso_version, kernel_version);
}
@@ -182,6 +214,29 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
port::error::NOT_FOUND,
"was unable to find libcuda.so DSO loaded into this program"));
+#if defined(__APPLE__)
+ // OSX CUDA libraries have names like: libcuda_310.41.15_mercury.dylib
+ const string prefix("libcuda_");
+ const string suffix("_mercury.dylib");
+ for (uint32_t image_index = 0; image_index < _dyld_image_count();
+ ++image_index) {
+ const string path(_dyld_get_image_name(image_index));
+ const size_t suffix_pos = path.rfind(suffix);
+ const size_t prefix_pos = path.rfind(prefix, suffix_pos);
+ if (prefix_pos == string::npos || suffix_pos == string::npos) {
+ // no match
+ continue;
+ }
+ const size_t start = prefix_pos + prefix.size();
+ if (start >= suffix_pos) {
+ // version not included
+ continue;
+ }
+ const size_t length = suffix_pos - start;
+ const string version = path.substr(start, length);
+ result = StringToDriverVersion(version);
+ }
+#else
#if !defined(PLATFORM_WINDOWS) && !defined(ANDROID_TEGRA)
// Callback used when iterating through DSOs. Looks for the driver-interfacing
// DSO and yields its version number into the callback data, when found.
@@ -215,6 +270,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
dl_iterate_phdr(iterate_phdr, &result);
#endif
+#endif
return result;
}
@@ -259,7 +315,41 @@ void Diagnostician::WarnOnDsoKernelMismatch(
port::StatusOr<DriverVersion> Diagnostician::FindKernelDriverVersion() {
-#if defined(PLATFORM_WINDOWS)
+#if defined(__APPLE__)
+ CFStringRef kext_ids[1];
+ kext_ids[0] = kDriverKextIdentifier;
+ CFArrayRef kext_id_query = CFArrayCreate(nullptr, (const void **)kext_ids, 1,
+ &kCFTypeArrayCallBacks);
+ CFDictionaryRef kext_infos =
+ KextManagerCopyLoadedKextInfo(kext_id_query, nullptr);
+ CFRelease(kext_id_query);
+
+ CFDictionaryRef cuda_driver_info = nullptr;
+ if (CFDictionaryGetValueIfPresent(kext_infos, kDriverKextIdentifier,
+ (const void **)&cuda_driver_info)) {
+ // NOTE: OSX CUDA driver does not currently store the same driver version
+ // in kCFBundleVersionKey as is returned by cuDriverGetVersion
+ CFRelease(kext_infos);
+ const CFStringRef str = (CFStringRef)CFDictionaryGetValue(
+ cuda_driver_info, kCFBundleVersionKey);
+ const char *version = CFStringGetCStringPtr(str, kCFStringEncodingUTF8);
+
+ // version can be NULL in which case treat it as empty string
+ // see
+ // https://developer.apple.com/library/mac/documentation/CoreFoundation/Conceptual/CFStrings/Articles/AccessingContents.html#//apple_ref/doc/uid/20001184-100980-TPXREF112
+ if (version == NULL) {
+ return StringToDriverVersion("");
+ }
+ return StringToDriverVersion(version);
+ }
+ CFRelease(kext_infos);
+ auto status = port::Status(
+ port::error::INTERNAL,
+ port::StrCat(
+ "failed to read driver bundle version: ",
+ CFStringGetCStringPtr(kDriverKextIdentifier, kCFStringEncodingUTF8)));
+ return status;
+#elif defined(PLATFORM_WINDOWS)
auto status =
port::Status(port::error::UNIMPLEMENTED,
"kernel reported driver version not implemented on Windows");
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index edf217875f..f11022ef1d 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -15,6 +15,9 @@ limitations under the License.
#include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
+#if defined(__APPLE__)
+#include <mach-o/dyld.h>
+#endif
#if defined(PLATFORM_WINDOWS)
#include <windows.h>
#define PATH_MAX MAX_PATH
@@ -176,12 +179,20 @@ bool CUDAExecutor::FindOnDiskForComputeCapability(
// would return /usr/bin.
static string GetBinaryDir(bool strip_exe) {
char exe_path[PATH_MAX] = {0};
+#if defined(__APPLE__)
+ uint32_t buffer_size = 0U;
+ _NSGetExecutablePath(nullptr, &buffer_size);
+ char unresolved_path[buffer_size];
+ _NSGetExecutablePath(unresolved_path, &buffer_size);
+ CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1);
+#else
#if defined(PLATFORM_WINDOWS)
HMODULE hModule = GetModuleHandle(NULL);
GetModuleFileName(hModule, exe_path, MAX_PATH);
#else
CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
#endif
+#endif
// Make sure it's null-terminated:
exe_path[sizeof(exe_path) - 1] = 0;
@@ -843,7 +854,10 @@ CudaContext* CUDAExecutor::cuda_context() { return context_; }
// For anything more complicated/prod-focused than this, you'll likely want to
// turn to gsys' topology modeling.
static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
-#if defined(PLATFORM_WINDOWS)
+#if defined(__APPLE__)
+ LOG(INFO) << "OS X does not support NUMA - returning NUMA node zero";
+ return 0;
+#elif defined(PLATFORM_WINDOWS)
// Windows support for NUMA is not currently implemented. Return node 0.
return 0;
#elif defined(__aarch64__)