diff options
Diffstat (limited to 'tensorflow/stream_executor/cuda')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_diagnostics.cc | 98 | ||||
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 16 |
2 files changed, 109 insertions, 5 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc index 10f6d21d54..124d5905b9 100644 --- a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc +++ b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc @@ -24,12 +24,17 @@ limitations under the License. #include <stdio.h> #include <stdlib.h> #include <string.h> +#ifdef __APPLE__ +#include <IOKit/kext/KextManager.h> +#include <mach-o/dyld.h> +#else #if !defined(PLATFORM_WINDOWS) #include <link.h> #include <sys/sysmacros.h> #include <unistd.h> #endif #include <sys/stat.h> +#endif #include <algorithm> #include <memory> #include <vector> @@ -49,7 +54,9 @@ limitations under the License. namespace stream_executor { namespace cuda { -#if !defined(PLATFORM_WINDOWS) +#ifdef __APPLE__ +static const CFStringRef kDriverKextIdentifier = CFSTR("com.nvidia.CUDA"); +#elif !defined(PLATFORM_WINDOWS) static const char *kDriverVersionPath = "/proc/driver/nvidia/version"; #endif @@ -114,7 +121,31 @@ string Diagnostician::GetDevNodePath(int dev_node_ordinal) { } void Diagnostician::LogDiagnosticInformation() { -#if !defined(PLATFORM_WINDOWS) +#ifdef __APPLE__ + CFStringRef kext_ids[1]; + kext_ids[0] = kDriverKextIdentifier; + CFArrayRef kext_id_query = CFArrayCreate(nullptr, (const void **)kext_ids, 1, + &kCFTypeArrayCallBacks); + CFDictionaryRef kext_infos = + KextManagerCopyLoadedKextInfo(kext_id_query, nullptr); + CFRelease(kext_id_query); + + CFDictionaryRef cuda_driver_info = nullptr; + if (CFDictionaryGetValueIfPresent(kext_infos, kDriverKextIdentifier, + (const void **)&cuda_driver_info)) { + bool started = CFBooleanGetValue((CFBooleanRef)CFDictionaryGetValue( + cuda_driver_info, CFSTR("OSBundleStarted"))); + if (!started) { + LOG(INFO) << "kernel driver is installed, but does not appear to be " + "running on this host " + << "(" << port::Hostname() << ")"; + } + } else { + LOG(INFO) << "kernel driver does not appear to be installed on this host " + << "(" << port::Hostname() << ")"; + } + CFRelease(kext_infos); +#elif !defined(PLATFORM_WINDOWS) if (access(kDriverVersionPath, F_OK) != 0) { LOG(INFO) << "kernel driver does not appear to be running on this host " << "(" << port::Hostname() << "): " @@ -168,7 +199,8 @@ void Diagnostician::LogDiagnosticInformation() { << DriverVersionStatusToString(kernel_version); #endif -#if !defined(PLATFORM_WINDOWS) + // OS X kernel driver does not report version accurately +#if !defined(__APPLE__) && !defined(PLATFORM_WINDOWS) if (kernel_version.ok() && dso_version.ok()) { WarnOnDsoKernelMismatch(dso_version, kernel_version); } @@ -182,6 +214,29 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() { port::error::NOT_FOUND, "was unable to find libcuda.so DSO loaded into this program")); +#if defined(__APPLE__) + // OSX CUDA libraries have names like: libcuda_310.41.15_mercury.dylib + const string prefix("libcuda_"); + const string suffix("_mercury.dylib"); + for (uint32_t image_index = 0; image_index < _dyld_image_count(); + ++image_index) { + const string path(_dyld_get_image_name(image_index)); + const size_t suffix_pos = path.rfind(suffix); + const size_t prefix_pos = path.rfind(prefix, suffix_pos); + if (prefix_pos == string::npos || suffix_pos == string::npos) { + // no match + continue; + } + const size_t start = prefix_pos + prefix.size(); + if (start >= suffix_pos) { + // version not included + continue; + } + const size_t length = suffix_pos - start; + const string version = path.substr(start, length); + result = StringToDriverVersion(version); + } +#else #if !defined(PLATFORM_WINDOWS) && !defined(ANDROID_TEGRA) // Callback used when iterating through DSOs. Looks for the driver-interfacing // DSO and yields its version number into the callback data, when found. @@ -215,6 +270,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() { dl_iterate_phdr(iterate_phdr, &result); #endif +#endif return result; } @@ -259,7 +315,41 @@ void Diagnostician::WarnOnDsoKernelMismatch( port::StatusOr<DriverVersion> Diagnostician::FindKernelDriverVersion() { -#if defined(PLATFORM_WINDOWS) +#if defined(__APPLE__) + CFStringRef kext_ids[1]; + kext_ids[0] = kDriverKextIdentifier; + CFArrayRef kext_id_query = CFArrayCreate(nullptr, (const void **)kext_ids, 1, + &kCFTypeArrayCallBacks); + CFDictionaryRef kext_infos = + KextManagerCopyLoadedKextInfo(kext_id_query, nullptr); + CFRelease(kext_id_query); + + CFDictionaryRef cuda_driver_info = nullptr; + if (CFDictionaryGetValueIfPresent(kext_infos, kDriverKextIdentifier, + (const void **)&cuda_driver_info)) { + // NOTE: OSX CUDA driver does not currently store the same driver version + // in kCFBundleVersionKey as is returned by cuDriverGetVersion + CFRelease(kext_infos); + const CFStringRef str = (CFStringRef)CFDictionaryGetValue( + cuda_driver_info, kCFBundleVersionKey); + const char *version = CFStringGetCStringPtr(str, kCFStringEncodingUTF8); + + // version can be NULL in which case treat it as empty string + // see + // https://developer.apple.com/library/mac/documentation/CoreFoundation/Conceptual/CFStrings/Articles/AccessingContents.html#//apple_ref/doc/uid/20001184-100980-TPXREF112 + if (version == NULL) { + return StringToDriverVersion(""); + } + return StringToDriverVersion(version); + } + CFRelease(kext_infos); + auto status = port::Status( + port::error::INTERNAL, + port::StrCat( + "failed to read driver bundle version: ", + CFStringGetCStringPtr(kDriverKextIdentifier, kCFStringEncodingUTF8))); + return status; +#elif defined(PLATFORM_WINDOWS) auto status = port::Status(port::error::UNIMPLEMENTED, "kernel reported driver version not implemented on Windows"); diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index edf217875f..f11022ef1d 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -15,6 +15,9 @@ limitations under the License. #include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h" +#if defined(__APPLE__) +#include <mach-o/dyld.h> +#endif #if defined(PLATFORM_WINDOWS) #include <windows.h> #define PATH_MAX MAX_PATH @@ -176,12 +179,20 @@ bool CUDAExecutor::FindOnDiskForComputeCapability( // would return /usr/bin. static string GetBinaryDir(bool strip_exe) { char exe_path[PATH_MAX] = {0}; +#if defined(__APPLE__) + uint32_t buffer_size = 0U; + _NSGetExecutablePath(nullptr, &buffer_size); + char unresolved_path[buffer_size]; + _NSGetExecutablePath(unresolved_path, &buffer_size); + CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1); +#else #if defined(PLATFORM_WINDOWS) HMODULE hModule = GetModuleHandle(NULL); GetModuleFileName(hModule, exe_path, MAX_PATH); #else CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1)); #endif +#endif // Make sure it's null-terminated: exe_path[sizeof(exe_path) - 1] = 0; @@ -843,7 +854,10 @@ CudaContext* CUDAExecutor::cuda_context() { return context_; } // For anything more complicated/prod-focused than this, you'll likely want to // turn to gsys' topology modeling. static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) { -#if defined(PLATFORM_WINDOWS) +#if defined(__APPLE__) + LOG(INFO) << "OS X does not support NUMA - returning NUMA node zero"; + return 0; +#elif defined(PLATFORM_WINDOWS) // Windows support for NUMA is not currently implemented. Return node 0. return 0; #elif defined(__aarch64__) |