Enable building with CUDA support on Mac OS X (#664)

* Enable building with CUDA support on Mac OS X * Building with CUDA support on OS X requires GNU coreutils due to the OS X native readlink command behaving differently from the GNU version * OS X requires CUDA toolkit 7.5 due to host compiler incompatibility with earlier CUDA toolkits - the toolkit versions (CUDA & cuDNN) are now controlled by variables set in the configure script * Fix symlink creation for cuDNN when found in alternative location. * * fix library name formatting broken during rebase * point to Eigen fork that builds on OS X - pending Eigen PR merge * Do not auto generate platform.bzl - modify it as part of unofficial settings. * Upgrade to latest Eigen and fix issues introduced during rebase. * Fix breaking changes brought in by rebase. * * remove redundant cuda/cudnn version functionality * minimize unnecessary diffs against master branch to ease review/merge * Address code review comments: * ensure configure generated platform.bzl values default to empty * fix cudnn default version * Fix syntax error introduced during rebase. * Address code review comments - conform to style and add comments. * Handle empty library versions. * Default to symlinked versions of CUDA libraries when no version is specified. * Add cudnn.h header search path accidentally lost during rebase.
author: Ville Kallioniemi <ville.kallioniemi@gmail.com> 2016-04-27 21:49:10 -0600
committer: Vijay Vasudevan <vrv@google.com> 2016-04-27 20:49:10 -0700
commit: 59faa82c072c473551b25695431fab613675bf24 (patch)
tree: b29d7f0ddbceb3967d2b7cfd66d045a1a98d6b4e
parent: ae3c8479f88da1cd5636b974f653f27755cb0034 (diff)
19 files changed, 554 insertions, 139 deletions
diff --git a/configure b/configure
index 0a7d697c40..0a43b31468 100755
--- a/configure
+++ b/configure
@@ -78,6 +78,8 @@ done
 
 
 # Find out where the CUDA toolkit is installed
+OSNAME=`uname -s`
+
 while true; do
   # Configure the Cuda SDK version to use.
   if [ -z "$TF_CUDA_VERSION" ]; then
@@ -93,15 +95,24 @@ while true; do
       CUDA_TOOLKIT_PATH=$default_cuda_path
     fi
   fi
+
   if [[ -z "$TF_CUDA_VERSION" ]]; then
     TF_CUDA_EXT=""
   else
     TF_CUDA_EXT=".$TF_CUDA_VERSION"
   fi
-  if [ -e $CUDA_TOOLKIT_PATH/lib64/libcudart.so$TF_CUDA_EXT ]; then
+
+  if [ "$OSNAME" == "Linux" ]; then
+    CUDA_RT_LIB_PATH="lib64/libcudart.so${TF_CUDA_EXT}"
+  elif [ "$OSNAME" == "Darwin" ]; then
+    CUDA_RT_LIB_PATH="lib/libcudart${TF_CUDA_EXT}.dylib"
+  fi
+
+  if [ -e "${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}" ]; then
     break
   fi
-  echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. $CUDA_TOOLKIT_PATH/lib64/libcudart.so$TF_CUDA_EXT cannot be found"
+  echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH} cannot be found"
+
   if [ -z "$fromuser" ]; then
     exit 1
   fi
@@ -127,25 +138,41 @@ while true; do
     fi
     # Result returned from "read" will be used unexpanded. That make "~" unuseable.
     # Going through one more level of expansion to handle that.
-    CUDNN_INSTALL_PATH=$(bash -c "readlink -f $CUDNN_INSTALL_PATH")
+    CUDNN_INSTALL_PATH=`${PYTHON_BIN_PATH} -c "import os; print(os.path.realpath(os.path.expanduser('${CUDNN_INSTALL_PATH}')))"`
   fi
+
   if [[ -z "$TF_CUDNN_VERSION" ]]; then
     TF_CUDNN_EXT=""
   else
     TF_CUDNN_EXT=".$TF_CUDNN_VERSION"
   fi
-  if [ -e "$CUDNN_INSTALL_PATH/libcudnn.so${TF_CUDNN_EXT}" -o -e "$CUDNN_INSTALL_PATH/lib64/libcudnn.so${TF_CUDNN_EXT}" ]; then
-    break
+
+  if [ "$OSNAME" == "Linux" ]; then
+    CUDA_DNN_LIB_PATH="lib64/libcudnn.so${TF_CUDNN_EXT}"
+    CUDA_DNN_LIB_ALT_PATH="libcudnn.so${TF_CUDNN_EXT}"
+  elif [ "$OSNAME" == "Darwin" ]; then
+    CUDA_DNN_LIB_PATH="lib/libcudnn${TF_CUDNN_EXT}.dylib"
+    CUDA_DNN_LIB_ALT_PATH="libcudnn${TF_CUDNN_EXT}.dylib"
   fi
-  CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
-  if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
-    CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
+
+  if [ -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_ALT_PATH}" -o -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_PATH}" ]; then
     break
   fi
-  echo "Invalid path to cuDNN ${TF_CUDNN_VERSION} toolkit. Neither of the following two files can be found:"
-  echo "$CUDNN_INSTALL_PATH/lib64/libcudnn.so${TF_CUDNN_EXT}"
-  echo "$CUDNN_INSTALL_PATH/libcudnn.so${TF_CUDNN_EXT}"
-  echo "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}"
+
+  if [ "$OSNAME" == "Linux" ]; then
+    CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
+    if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
+      CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
+      break
+    fi
+  fi
+  echo "Invalid path to cuDNN ${CUDNN_VERSION} toolkit. Neither of the following two files can be found:"
+  echo "${CUDNN_INSTALL_PATH}/${CUDA_DNN_LIB_PATH}"
+  echo "${CUDNN_INSTALL_PATH}/${CUDA_DNN_LIB_ALT_PATH}"
+  if [ "$OSNAME" == "Linux" ]; then
+    echo "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}"
+  fi
+
   if [ -z "$fromuser" ]; then
     exit 1
   fi
@@ -157,18 +184,16 @@ done
 cat > third_party/gpus/cuda/cuda.config <<EOF
 # CUDA_TOOLKIT_PATH refers to the CUDA toolkit.
 CUDA_TOOLKIT_PATH="$CUDA_TOOLKIT_PATH"
-
 # CUDNN_INSTALL_PATH refers to the cuDNN toolkit. The cuDNN header and library
 # files can be either in this directory, or under include/ and lib64/
 # directories separately.
 CUDNN_INSTALL_PATH="$CUDNN_INSTALL_PATH"
 
 # The Cuda SDK version that should be used in this build (empty to use libcudart.so symlink)
-TF_CUDA_VERSION=$TF_CUDA_EXT
-
-# The Cudnn version that should be used in this build (empty to use libcudnn.so symlink)
-TF_CUDNN_VERSION=$TF_CUDNN_EXT
+TF_CUDA_VERSION=$TF_CUDA_VERSION
 
+# The Cudnn version that should be used in this build
+TF_CUDNN_VERSION=$TF_CUDNN_VERSION
 EOF
 
 # Configure the gcc host compiler to use
@@ -176,13 +201,17 @@ export WARNING=$DO_NOT_SUBMIT_WARNING
 perl -pi -e "s,CPU_COMPILER = \('.*'\),# \$ENV{WARNING}\nCPU_COMPILER = ('$GCC_HOST_COMPILER_PATH'),s" third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
 perl -pi -e "s,GCC_HOST_COMPILER_PATH = \('.*'\),# \$ENV{WARNING}\nGCC_HOST_COMPILER_PATH = ('$GCC_HOST_COMPILER_PATH'),s" third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
 
+# Configure the platform name.
+perl -pi -e "s,PLATFORM = \".*\",PLATFORM = \"$OSNAME\",s" third_party/gpus/cuda/platform.bzl
+
 # Configure the Cuda toolkit version to work with.
-perl -pi -e "s,CUDA_VERSION = \"[0-9\.]*\",CUDA_VERSION = \"$TF_CUDA_EXT\",s" tensorflow/core/platform/default/build_config.bzl
-perl -pi -e "s,(GetCudaVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDA_EXT\",s" tensorflow/stream_executor/dso_loader.cc
+perl -pi -e "s,(GetCudaVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDA_VERSION\",s" tensorflow/stream_executor/dso_loader.cc
+perl -pi -e "s,CUDA_VERSION = \"[0-9\.]*\",CUDA_VERSION = \"$TF_CUDA_VERSION\",s" third_party/gpus/cuda/platform.bzl
 
 # Configure the Cudnn version to work with.
-perl -pi -e "s,CUDNN_VERSION = \"[0-9\.]*\",CUDNN_VERSION = \"$TF_CUDNN_EXT\",s" tensorflow/core/platform/default/build_config.bzl
-perl -pi -e "s,(GetCudnnVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDNN_EXT\",s" tensorflow/stream_executor/dso_loader.cc
+perl -pi -e "s,(GetCudnnVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDNN_VERSION\",s" tensorflow/stream_executor/dso_loader.cc
+perl -pi -e "s,CUDNN_VERSION = \"[0-9\.]*\",CUDNN_VERSION = \"$TF_CUDNN_VERSION\",s" third_party/gpus/cuda/platform.bzl
+
 
 # Configure the compute capabilities that TensorFlow builds for.
 # Since Cuda toolkit is not backward-compatible, this is not guaranteed to work.
diff --git a/tensorflow/core/common_runtime/gpu/gpu_tracer.cc b/tensorflow/core/common_runtime/gpu/gpu_tracer.cc
index 30d59fe7ba..08a3a5962a 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_tracer.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_tracer.cc
@@ -257,7 +257,7 @@ CUPTIManager *GetCUPTIManager() {
 // TODO(pbar) Move this to platform specific header file?
 // Static thread local variable for POD types.
 #define TF_STATIC_THREAD_LOCAL_POD(_Type_, _var_)                  \
-  static thread_local _Type_ s_obj_##_var_;                        \
+  static __thread _Type_ s_obj_##_var_;                            \
   namespace {                                                      \
   class ThreadLocal_##_var_ {                                      \
    public:                                                         \
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 633441f31b..6b3d85ded4 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -3,11 +3,6 @@
 load("//google/protobuf:protobuf.bzl", "cc_proto_library")
 load("//google/protobuf:protobuf.bzl", "py_proto_library")
 
-# configure may change the following lines to '.X.Y' or similar
-CUDA_VERSION = ""
-
-CUDNN_VERSION = ""
-
 # Appends a suffix to a list of deps.
 def tf_deps(deps, suffix):
   tf_deps = []
@@ -96,9 +91,3 @@ def tf_additional_test_srcs():
 
 def tf_kernel_tests_linkstatic():
   return 0
-
-def tf_get_cuda_version():
-  return CUDA_VERSION
-
-def tf_get_cudnn_version():
-  return CUDNN_VERSION
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index da86245cc1..994b6a3a3c 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -9,7 +9,7 @@ exports_files(["LICENSE"])
 
 load("//tensorflow:tensorflow.bzl", "tf_copts")
 load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
-load("//tensorflow/core:platform/default/build_config.bzl", "tf_get_cuda_version")
+load("/third_party/gpus/cuda/platform", "cuda_library_path")
 
 cc_library(
     name = "gtest",
@@ -31,7 +31,16 @@ tf_cuda_library(
     name = "stream_executor",
     deps = [
         "//tensorflow/stream_executor",
-    ],
+    ] + select({
+        "//third_party/gpus/cuda:darwin": ["IOKit"],
+        "//conditions:default": []
+    }),
+)
+
+# OSX framework for device driver access
+cc_library(
+  name = "IOKit",
+  linkopts = ["-framework IOKit"],
 )
 
 cc_library(
@@ -69,12 +78,18 @@ filegroup(
 cc_library(
     name = "cuda",
     data = [
-        "//third_party/gpus/cuda:lib64/libcudart.so" + tf_get_cuda_version(),
-    ],
-    linkopts = [
-        "-Wl,-rpath,third_party/gpus/cuda/lib64",
-        "-Wl,-rpath,third_party/gpus/cuda/extras/CUPTI/lib64",
+        "//third_party/gpus/cuda:{}".format(cuda_library_path("cudart")),
     ],
+    linkopts = select({
+        "//third_party/gpus/cuda:darwin": [
+            "-Wl,-rpath,third_party/gpus/cuda/lib",
+            "-Wl,-rpath,third_party/gpus/cuda/extras/CUPTI/lib"
+        ],
+        "//conditions:default": [
+            "-Wl,-rpath,third_party/gpus/cuda/lib64",
+            "-Wl,-rpath,third_party/gpus/cuda/extras/CUPTI/lib64"
+        ]
+    }),
     deps = [
         "//third_party/gpus/cuda:cudart",
     ],
diff --git a/tensorflow/core/platform/load_library.cc b/tensorflow/core/platform/load_library.cc
index b8a93906f1..24fdcfd1fc 100644
--- a/tensorflow/core/platform/load_library.cc
+++ b/tensorflow/core/platform/load_library.cc
@@ -49,6 +49,24 @@ Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
   return Status::OK();
 }
 
+string FormatLibraryFileName(const string& name, const string& version) {
+  string filename;
+#if defined(__APPLE__)
+  if (version.size() == 0) {
+    filename = "lib" + name + ".dylib";
+  } else {
+    filename = "lib" + name + "." + version + ".dylib";
+  }
+#else
+  if (version.size() == 0) {
+    filename = "lib" + name + ".so";
+  } else {
+    filename = "lib" + name + ".so" + "." + version;
+  }
+#endif
+  return filename;
+}
+
 }  // namespace internal
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/load_library.h b/tensorflow/core/platform/load_library.h
index b67e8835c5..96c3cab156 100644
--- a/tensorflow/core/platform/load_library.h
+++ b/tensorflow/core/platform/load_library.h
@@ -25,6 +25,9 @@ namespace internal {
 Status LoadLibrary(const char* library_filename, void** handle);
 Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
                             void** symbol);
+// Return the filename of a dynamically linked library formatted according to
+// platform naming conventions
+string FormatLibraryFileName(const string& name, const string& version);
 
 }  // namespace internal
 
diff --git a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
index 87dd42063c..f24bedc61d 100644
--- a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
+++ b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
@@ -17,13 +17,18 @@ limitations under the License.
 
 #include <dirent.h>
 #include <limits.h>
-#include <link.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#ifdef __APPLE__
+#include <IOKit/kext/KextManager.h>
+#include <mach-o/dyld.h>
+#else
+#include <link.h>
 #include <sys/stat.h>
 #include <sys/sysmacros.h>
+#endif
 #include <unistd.h>
 #include <algorithm>
 #include <memory>
@@ -45,10 +50,15 @@ namespace perftools {
 namespace gputools {
 namespace cuda {
 
+#ifdef __APPLE__
+static const CFStringRef kDriverKextIdentifier = CFSTR("com.nvidia.CUDA");
+#else
 static const char *kDriverVersionPath = "/proc/driver/nvidia/version";
+#endif
+
 
 string DriverVersionToString(DriverVersion version) {
-  return port::Printf("%d.%d", std::get<0>(version), std::get<1>(version));
+  return port::Printf("%d.%d.%d", std::get<0>(version), std::get<1>(version), std::get<2>(version));
 }
 
 string DriverVersionStatusToString(port::StatusOr<DriverVersion> version) {
@@ -61,15 +71,16 @@ string DriverVersionStatusToString(port::StatusOr<DriverVersion> version) {
 
 port::StatusOr<DriverVersion> StringToDriverVersion(const string &value) {
   std::vector<string> pieces = port::Split(value, '.');
-  if (pieces.size() != 2) {
+  if (pieces.size() != 2 && pieces.size() != 3) {
     return port::Status{
         port::error::INVALID_ARGUMENT,
-        port::Printf("expected %%d.%%d form for driver version; got \"%s\"",
+        port::Printf("expected %%d.%%d or %%d.%%d.%%d form for driver version; got \"%s\"",
                      value.c_str())};
   }
 
   int major;
   int minor;
+  int patch = 0;
   if (!port::safe_strto32(pieces[0], &major)) {
     return port::Status{
         port::error::INVALID_ARGUMENT,
@@ -84,8 +95,15 @@ port::StatusOr<DriverVersion> StringToDriverVersion(const string &value) {
                      "integer from string \"%s\"",
                      pieces[1].c_str(), value.c_str())};
   }
+  if (pieces.size() == 3 && !port::safe_strto32(pieces[2], &patch)) {
+    return port::Status{
+      port::error::INVALID_ARGUMENT,
+      port::Printf("could not parse patch version number \"%s\" as an "
+                     "integer from string \"%s\"",
+                   pieces[2].c_str(), value.c_str())};
+  }
 
-  DriverVersion result{major, minor};
+  DriverVersion result{major, minor, patch};
   VLOG(2) << "version string \"" << value << "\" made value "
           << DriverVersionToString(result);
   return result;
@@ -98,6 +116,26 @@ string Diagnostician::GetDevNodePath(int dev_node_ordinal) {
 }
 
 void Diagnostician::LogDiagnosticInformation() {
+#ifdef __APPLE__
+  CFStringRef kext_ids[1];
+  kext_ids[0] = kDriverKextIdentifier;
+  CFArrayRef kext_id_query = CFArrayCreate(nullptr, (const void**)kext_ids, 1, &kCFTypeArrayCallBacks);
+  CFDictionaryRef kext_infos = KextManagerCopyLoadedKextInfo(kext_id_query, nullptr);
+  CFRelease(kext_id_query);
+
+  CFDictionaryRef cuda_driver_info = nullptr;
+  if (CFDictionaryGetValueIfPresent(kext_infos, kDriverKextIdentifier, (const void**)&cuda_driver_info)) {
+    bool started = CFBooleanGetValue((CFBooleanRef)CFDictionaryGetValue(cuda_driver_info, CFSTR("OSBundleStarted")));
+    if (!started) {
+      LOG(INFO) << "kernel driver is installed, but does not appear to be running on this host "
+                << "(" << port::Hostname() << ")";
+    }
+  } else {
+    LOG(INFO) << "kernel driver does not appear to be installed on this host "
+              << "(" << port::Hostname() << ")";
+  }
+  CFRelease(kext_infos);
+#else
   if (access(kDriverVersionPath, F_OK) != 0) {
     LOG(INFO) << "kernel driver does not appear to be running on this host "
               << "(" << port::Hostname() << "): "
@@ -110,6 +148,7 @@ void Diagnostician::LogDiagnosticInformation() {
               << " does not exist";
     return;
   }
+#endif
 
   LOG(INFO) << "retrieving CUDA diagnostic information for host: "
             << port::Hostname();
@@ -149,9 +188,13 @@ void Diagnostician::LogDiagnosticInformation() {
   port::StatusOr<DriverVersion> kernel_version = FindKernelDriverVersion();
   LOG(INFO) << "kernel reported version is: "
             << DriverVersionStatusToString(kernel_version);
+
+  // OS X kernel driver does not report version accurately
+#if !defined(__APPLE__)
   if (kernel_version.ok() && dso_version.ok()) {
     WarnOnDsoKernelMismatch(dso_version, kernel_version);
   }
+#endif
 }
 
 // Iterates through loaded DSOs with DlIteratePhdrCallback to find the
@@ -161,6 +204,29 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
       port::error::NOT_FOUND,
       "was unable to find libcuda.so DSO loaded into this program"}};
 
+#if defined(__APPLE__)
+    // OSX CUDA libraries have names like: libcuda_310.41.15_mercury.dylib
+    const string prefix("libcuda_");
+    const string suffix("_mercury.dylib");
+    for (uint32_t image_index = 0; image_index < _dyld_image_count(); ++image_index) {
+      const string path(_dyld_get_image_name(image_index));
+      const size_t suffix_pos = path.rfind(suffix);
+      const size_t prefix_pos = path.rfind(prefix, suffix_pos);
+      if (prefix_pos == string::npos ||
+          suffix_pos == string::npos) {
+        // no match
+        continue;
+      }
+      const size_t start = prefix_pos + prefix.size();
+      if (start >= suffix_pos) {
+        // version not included
+        continue;
+      }
+      const size_t length = suffix_pos - start;
+      const string version = path.substr(start, length);
+      result = StringToDriverVersion(version);
+    }
+#else
   // Callback used when iterating through DSOs. Looks for the driver-interfacing
   // DSO and yields its version number into the callback data, when found.
   auto iterate_phdr =
@@ -192,6 +258,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
   };
 
   dl_iterate_phdr(iterate_phdr, &result);
+#endif
 
   return result;
 }
@@ -236,6 +303,29 @@ void Diagnostician::WarnOnDsoKernelMismatch(
 
 
 port::StatusOr<DriverVersion> Diagnostician::FindKernelDriverVersion() {
+#if defined(__APPLE__)
+  CFStringRef kext_ids[1];
+  kext_ids[0] = kDriverKextIdentifier;
+  CFArrayRef kext_id_query = CFArrayCreate(nullptr, (const void**)kext_ids, 1, &kCFTypeArrayCallBacks);
+  CFDictionaryRef kext_infos = KextManagerCopyLoadedKextInfo(kext_id_query, nullptr);
+  CFRelease(kext_id_query);
+
+  CFDictionaryRef cuda_driver_info = nullptr;
+  if (CFDictionaryGetValueIfPresent(kext_infos, kDriverKextIdentifier, (const void**)&cuda_driver_info)) {
+    // NOTE: OSX CUDA driver does not currently store the same driver version
+    // in kCFBundleVersionKey as is returned by cuDriverGetVersion
+    const char * version = CFStringGetCStringPtr((CFStringRef)CFDictionaryGetValue(cuda_driver_info, kCFBundleVersionKey), kCFStringEncodingUTF8);
+    CFRelease(kext_infos);
+    return StringToDriverVersion(version);
+  }
+  CFRelease(kext_infos);
+  auto status =
+    port::Status{port::error::INTERNAL,
+                 port::StrCat("failed to read driver bundle version: ",
+                              CFStringGetCStringPtr(kDriverKextIdentifier, kCFStringEncodingUTF8))
+    };
+  return status;
+#else
   FILE *driver_version_file = fopen(kDriverVersionPath, "r");
   if (driver_version_file == nullptr) {
     return port::Status{
@@ -267,6 +357,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindKernelDriverVersion() {
                                 ferror(driver_version_file))};
   fclose(driver_version_file);
   return status;
+#endif
 }
 
 
diff --git a/tensorflow/stream_executor/cuda/cuda_diagnostics.h b/tensorflow/stream_executor/cuda/cuda_diagnostics.h
index 42336c337f..e98d32f286 100644
--- a/tensorflow/stream_executor/cuda/cuda_diagnostics.h
+++ b/tensorflow/stream_executor/cuda/cuda_diagnostics.h
@@ -26,8 +26,8 @@ namespace perftools {
 namespace gputools {
 namespace cuda {
 
-// e.g. DriverVersion{331, 79}
-using DriverVersion = std::tuple<int, int>;
+// e.g. DriverVersion{346, 3, 4}
+using DriverVersion = std::tuple<int, int, int>;
 
 // Converts a parsed driver version to string form.
 string DriverVersionToString(DriverVersion version);
@@ -72,8 +72,6 @@ class Diagnostician {
   static void LogDriverVersionInformation();
 
  private:
-  // Logs information about the loaded nvidia-related kernel modules.
-  static void LogKernelModuleInformation();
 
   // Given the DSO version number and the driver version file contents, extracts
   // the driver version and compares, warning the user in the case of
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index f21065690c..7034822356 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -302,10 +302,13 @@ port::Status CudnnSupport::Init() {
     } else {
       const auto& version = result.ValueOrDie();
       LOG(INFO) << "running driver version: " << DriverVersionToString(version);
+      // OS X kernel driver does not report version accurately
+#if !defined(__APPLE__)
       if (std::get<0>(version) < 340) {
         LOG(ERROR)
             << "cudnn library is only supported on 340.XX+ driver versions";
       }
+#endif
     }
   }
 
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index f98eec3b33..b343af0fce 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -15,6 +15,9 @@ limitations under the License.
 
 #include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
 
+#if defined(__APPLE__)
+#include <mach-o/dyld.h>
+#endif
 #include <unistd.h>
 
 #include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
@@ -194,7 +197,15 @@ bool CUDAExecutor::FindOnDiskForComputeCapability(
 //                 would return /usr/bin.
 static string GetBinaryDir(bool strip_exe) {
   char exe_path[PATH_MAX] = {0};
-  CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
+#if defined(__APPLE__)
+    uint32_t buffer_size = 0U;
+    _NSGetExecutablePath(nullptr, &buffer_size);
+    char unresolved_path[buffer_size];
+    _NSGetExecutablePath(unresolved_path, &buffer_size);
+    CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1);
+#else
+    CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
+#endif
   // Make sure it's null-terminated:
   exe_path[sizeof(exe_path) - 1] = 0;
 
@@ -868,6 +879,10 @@ CUcontext CUDAExecutor::cuda_context() { return context_; }
 // For anything more complicated/prod-focused than this, you'll likely want to
 // turn to gsys' topology modeling.
 static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
+#if defined(__APPLE__)
+  LOG(INFO) << "OS X does not support NUMA - returning NUMA node zero";
+  return 0;
+#else
   VLOG(2) << "trying to read NUMA node for device ordinal: " << device_ordinal;
   static const int kUnknownNumaNode = -1;
 
@@ -910,6 +925,7 @@ static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
       << content;
 
   return kUnknownNumaNode;
+#endif
 }
 
 // Set of compute capability specific device parameters that cannot be
diff --git a/tensorflow/stream_executor/dso_loader.cc b/tensorflow/stream_executor/dso_loader.cc
index caf10a9003..bf7faef209 100644
--- a/tensorflow/stream_executor/dso_loader.cc
+++ b/tensorflow/stream_executor/dso_loader.cc
@@ -17,12 +17,15 @@ limitations under the License.
 
 #include <dlfcn.h>
 #include <limits.h>
+#if defined(__APPLE__)
+#include <mach-o/dyld.h>
+#endif
 #include <stdlib.h>
 #include <unistd.h>
 #include <initializer_list>
-#include "tensorflow/stream_executor/platform/port.h"
 #include <vector>
 
+#include "tensorflow/core/platform/load_library.h"
 #include "tensorflow/stream_executor/lib/error.h"
 #include "tensorflow/stream_executor/lib/str_util.h"
 #include "tensorflow/stream_executor/lib/strcat.h"
@@ -41,8 +44,8 @@ string GetCudaVersion() { return ""; }
 string GetCudnnVersion() { return ""; }
 
 /* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) {
-  return GetDsoHandle(FindDsoPath("libcublas.so" + GetCudaVersion(),
-                                  "third_party/gpus/cuda/lib64"),
+  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cublas", GetCudaVersion()),
+                                  GetCudaLibraryDirPath()),
                       dso_handle);
 }
 
@@ -51,33 +54,33 @@ string GetCudnnVersion() { return ""; }
   // different version number than other CUDA libraries.  See b/22397368 for
   // some details about the complications surrounding this.
   return GetDsoHandle(
-      FindDsoPath("libcudnn.so" + GetCudnnVersion(),
-                  "third_party/gpus/cuda/lib64"),
-      dso_handle);
+      FindDsoPath(tensorflow::internal::FormatLibraryFileName("cudnn", GetCudnnVersion()),
+                              GetCudaLibraryDirPath()),
+                      dso_handle);
 }
 
 /* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) {
-  return GetDsoHandle(FindDsoPath("libcufft.so" + GetCudaVersion(),
-                                  "third_party/gpus/cuda/lib64"),
+  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cufft", GetCudaVersion()),
+                                  GetCudaLibraryDirPath()),
                       dso_handle);
 }
 
 /* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) {
-  return GetDsoHandle(FindDsoPath("libcurand.so" + GetCudaVersion(),
-                                  "third_party/gpus/cuda/lib64"),
+  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("curand", GetCudaVersion()),
+                                  GetCudaLibraryDirPath()),
                       dso_handle);
 }
 
 /* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) {
-  return GetDsoHandle(
-      FindDsoPath("libcuda.so.1", "third_party/gpus/cuda/driver/lib64"),
-      dso_handle);
+  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cuda", ""),
+                                  GetCudaDriverLibraryPath()),
+                      dso_handle);
 }
 
 /* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) {
   return GetDsoHandle(
-      FindDsoPath("libcupti.so" + GetCudaVersion(),
-                  "third_party/gpus/cuda/extras/CUPTI/lib64"),
+      FindDsoPath(tensorflow::internal::FormatLibraryFileName("cupti", GetCudaVersion()),
+                  GetCudaCuptiLibraryPath()),
       dso_handle);
 }
 
@@ -109,7 +112,15 @@ string GetCudnnVersion() { return ""; }
 
 /* static */ string DsoLoader::GetBinaryDirectory(bool strip_executable_name) {
   char exe_path[PATH_MAX] = {0};
+#ifdef __APPLE__
+  uint32_t buffer_size(0U);
+  _NSGetExecutablePath(nullptr, &buffer_size);
+  char unresolved_path[buffer_size];
+  _NSGetExecutablePath(unresolved_path, &buffer_size);
+  CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1);
+#else
   CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
+#endif
   // Make sure it's null-terminated:
   exe_path[sizeof(exe_path) - 1] = 0;
 
@@ -126,8 +137,11 @@ string GetCudnnVersion() { return ""; }
 // Ownership is transferred to the caller.
 static std::vector<string>* CreatePrimordialRpaths() {
   auto rpaths = new std::vector<string>;
-  rpaths->push_back(
-      "driver/driver_sh.runfiles/third_party/gpus/cuda/lib64");
+#if defined(__APPLE__)
+  rpaths->push_back("driver/driver_sh.runfiles/third_party/gpus/cuda/lib");
+#else
+  rpaths->push_back("driver/driver_sh.runfiles/third_party/gpus/cuda/lib64");
+#endif
   return rpaths;
 }
 
@@ -175,6 +189,31 @@ static std::vector<string>* CreatePrimordialRpaths() {
   return library_name.ToString();
 }
 
+/* static */ string DsoLoader::GetCudaLibraryDirPath() {
+#if defined(__APPLE__)
+  return "third_party/gpus/cuda/lib";
+#else
+  return "third_party/gpus/cuda/lib64";
+#endif
+}
+
+/* static */ string DsoLoader::GetCudaDriverLibraryPath() {
+#if defined(__APPLE__)
+  return "third_party/gpus/cuda/driver/lib";
+#else
+  return "third_party/gpus/cuda/driver/lib64";
+#endif
+}
+
+/* static */ string DsoLoader::GetCudaCuptiLibraryPath() {
+#if defined(__APPLE__)
+  return "third_party/gpus/cuda/extras/CUPTI/lib";
+#else
+  return "third_party/gpus/cuda/extras/CUPTI/lib64";
+#endif
+}
+
+
 // -- CachedDsoLoader
 
 /* static */ port::StatusOr<void*> CachedDsoLoader::GetCublasDsoHandle() {
diff --git a/tensorflow/stream_executor/dso_loader.h b/tensorflow/stream_executor/dso_loader.h
index ba1690c320..2afbc294df 100644
--- a/tensorflow/stream_executor/dso_loader.h
+++ b/tensorflow/stream_executor/dso_loader.h
@@ -91,6 +91,11 @@ class DsoLoader {
   static string FindDsoPath(port::StringPiece library_name,
                             port::StringPiece runfiles_relpath);
 
+  // Return platform dependent paths for DSOs
+  static string GetCudaLibraryDirPath();
+  static string GetCudaDriverLibraryPath();
+  static string GetCudaCuptiLibraryPath();
+
   SE_DISALLOW_COPY_AND_ASSIGN(DsoLoader);
 };
 
diff --git a/tensorflow/stream_executor/lib/static_threadlocal.h b/tensorflow/stream_executor/lib/static_threadlocal.h
index 7098da3453..25d97ae000 100644
--- a/tensorflow/stream_executor/lib/static_threadlocal.h
+++ b/tensorflow/stream_executor/lib/static_threadlocal.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 // For POD types in TLS mode, s_obj_VAR is the thread-local variable.
 #define SE_STATIC_THREAD_LOCAL_POD(_Type_, _var_)               \
-  static thread_local _Type_ s_obj_##_var_;                     \
+  static __thread _Type_ s_obj_##_var_;                         \
   namespace {                                                   \
   class ThreadLocal_##_var_ {                                   \
   public:                                                       \
diff --git a/third_party/gpus/crosstool/BUILD b/third_party/gpus/crosstool/BUILD
index eac4dc7fad..7c9c8ab884 100644
--- a/third_party/gpus/crosstool/BUILD
+++ b/third_party/gpus/crosstool/BUILD
@@ -22,6 +22,20 @@ cc_toolchain(
     supports_param_files = 0,
 )
 
+cc_toolchain(
+    name = "cc-compiler-darwin",
+    all_files = ":empty",
+    compiler_files = ":empty",
+    cpu = "darwin",
+    dwp_files = ":empty",
+    dynamic_runtime_libs = [":empty"],
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    static_runtime_libs = [":empty"],
+    strip_files = ":empty",
+    supports_param_files = 0,
+)
+
 filegroup(
     name = "empty",
     srcs = [],
diff --git a/third_party/gpus/crosstool/CROSSTOOL b/third_party/gpus/crosstool/CROSSTOOL
index a9f26f5710..8db81a9603 100644
--- a/third_party/gpus/crosstool/CROSSTOOL
+++ b/third_party/gpus/crosstool/CROSSTOOL
@@ -150,3 +150,95 @@ toolchain {
   }
   linking_mode_flags { mode: DYNAMIC }
 }
+
+toolchain {
+  abi_version: "local"
+  abi_libc_version: "local"
+  builtin_sysroot: ""
+  compiler: "compiler"
+  host_system_name: "local"
+  needsPic: true
+  target_libc: "macosx"
+  target_cpu: "darwin"
+  target_system_name: "local"
+  toolchain_identifier: "local_darwin"
+
+  tool_path { name: "ar" path: "/usr/bin/libtool" }
+  tool_path { name: "compat-ld" path: "/usr/bin/ld" }
+  tool_path { name: "cpp" path: "/usr/bin/cpp" }
+  tool_path { name: "dwp" path: "/usr/bin/dwp" }
+  tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_is_not_gcc" }
+  cxx_flag: "-std=c++11"
+  ar_flag: "-static"
+  ar_flag: "-s"
+  ar_flag: "-o"
+  linker_flag: "-lc++"
+  linker_flag: "-undefined"
+  linker_flag: "dynamic_lookup"
+  # TODO(ulfjack): This is wrong on so many levels. Figure out a way to auto-detect the proper
+  # setting from the local compiler, and also how to make incremental builds correct.
+  cxx_builtin_include_directory: "/"
+  tool_path { name: "gcov" path: "/usr/bin/gcov" }
+  tool_path { name: "ld" path: "/usr/bin/ld" }
+  tool_path { name: "nm" path: "/usr/bin/nm" }
+  tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
+  objcopy_embed_flag: "-I"
+  objcopy_embed_flag: "binary"
+  tool_path { name: "objdump" path: "/usr/bin/objdump" }
+  tool_path { name: "strip" path: "/usr/bin/strip" }
+
+  # Anticipated future default.
+  unfiltered_cxx_flag: "-no-canonical-prefixes"
+  # Make C++ compilation deterministic. Use linkstamping instead of these
+  # compiler symbols.
+  unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
+  unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
+  unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
+  unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
+
+  # Security hardening on by default.
+  # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
+  compiler_flag: "-D_FORTIFY_SOURCE=1"
+  compiler_flag: "-fstack-protector"
+
+  # Enable coloring even if there's no attached terminal. Bazel removes the
+  # escape sequences if --nocolor is specified.
+  compiler_flag: "-fcolor-diagnostics"
+
+  # All warnings are enabled. Maybe enable -Werror as well?
+  compiler_flag: "-Wall"
+  # Enable a few more warnings that aren't part of -Wall.
+  compiler_flag: "-Wthread-safety"
+  compiler_flag: "-Wself-assign"
+
+  # Keep stack frames for debugging, even in opt mode.
+  compiler_flag: "-fno-omit-frame-pointer"
+
+  # Anticipated future default.
+  linker_flag: "-no-canonical-prefixes"
+
+  compilation_mode_flags {
+    mode: DBG
+    # Enable debug symbols.
+    compiler_flag: "-g"
+  }
+  compilation_mode_flags {
+    mode: OPT
+    # No debug symbols.
+    # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or even generally?
+    # However, that can't happen here, as it requires special handling in Bazel.
+    compiler_flag: "-g0"
+
+    # Conservative choice for -O
+    # -O3 can increase binary size and even slow down the resulting binaries.
+    # Profile first and / or use FDO if you need better performance than this.
+    compiler_flag: "-O2"
+
+    # Disable assertions
+    compiler_flag: "-DNDEBUG"
+
+    # Removal of unused code and data at link time (can this increase binary size in some cases?).
+    compiler_flag: "-ffunction-sections"
+    compiler_flag: "-fdata-sections"
+  }
+}
diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
index 04ab50ca86..5f175efcf3 100755
--- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python2.7
 # Copyright 2015 Google Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/third_party/gpus/cuda/BUILD b/third_party/gpus/cuda/BUILD
index 28b49b8cc7..792dbb4268 100644
--- a/third_party/gpus/cuda/BUILD
+++ b/third_party/gpus/cuda/BUILD
@@ -1,10 +1,11 @@
 licenses(["restricted"])  # MPL2, portions GPL v3, LGPL v3, BSD-like
 
 load("//tensorflow:tensorflow.bzl", "if_cuda")
-load("//tensorflow/core:platform/default/build_config.bzl",
-     "tf_get_cuda_version",
-     "tf_get_cudnn_version",
-    )
+load("platform", "cuda_library_path")
+load("platform", "cuda_static_library_path")
+load("platform", "cudnn_library_path")
+load("platform", "cupti_library_path")
+load("platform", "readlink_command")
 
 package(default_visibility = ["//visibility:public"])
 
@@ -31,6 +32,12 @@ config_setting(
     },
 )
 
+config_setting(
+    name = "darwin",
+    values = {"cpu": "darwin"},
+    visibility = ["//visibility:public"],
+)
+
 cc_library(
     name = "cuda_headers",
     hdrs = glob([
@@ -43,24 +50,26 @@ cc_library(
 cc_library(
     name = "cudart_static",
     srcs = [
-        "lib64/libcudart_static.a",
+        cuda_static_library_path("cudart"),
     ],
     includes = ["include/"],
     linkopts = [
         "-ldl",
-        "-lrt",
         "-lpthread",
-    ],
+    ] + select({
+        "//tensorflow:darwin": [],
+        "//conditions:default": ["-lrt"]
+    }),
     visibility = ["//visibility:public"],
 )
 
 cc_library(
     name = "cudart",
     srcs = [
-        "lib64/libcudart.so" + tf_get_cuda_version(),
+        cuda_library_path("cudart")
     ],
     data = [
-        "lib64/libcudart.so" + tf_get_cuda_version(),
+        cuda_library_path("cudart")
     ],
     includes = ["include/"],
     visibility = ["//visibility:public"],
@@ -70,10 +79,10 @@ cc_library(
 cc_library(
     name = "cublas",
     srcs = [
-        "lib64/libcublas.so" + tf_get_cuda_version(),
+        cuda_library_path("cublas")
     ],
     data = [
-        "lib64/libcublas.so" + tf_get_cuda_version(),
+        cuda_library_path("cublas")
     ],
     includes = ["include/"],
     visibility = ["//visibility:public"],
@@ -83,10 +92,10 @@ cc_library(
 cc_library(
     name = "cudnn",
     srcs = [
-        "lib64/libcudnn.so" + tf_get_cudnn_version(),
+        cudnn_library_path()
     ],
     data = [
-        "lib64/libcudnn.so" + tf_get_cudnn_version(),
+        cudnn_library_path()
     ],
     includes = ["include/"],
     visibility = ["//visibility:public"],
@@ -96,10 +105,10 @@ cc_library(
 cc_library(
     name = "cufft",
     srcs = [
-        "lib64/libcufft.so" + tf_get_cuda_version(),
+        cuda_library_path("cufft")
     ],
     data = [
-        "lib64/libcufft.so" + tf_get_cuda_version(),
+        cuda_library_path("cufft")
     ],
     includes = ["include/"],
     visibility = ["//visibility:public"],
@@ -130,7 +139,7 @@ cc_library(
 cc_library(
     name = "cupti_dsos",
     data = [
-        "extras/CUPTI/lib64/libcupti.so" + tf_get_cuda_version(),
+        cupti_library_path(),
     ],
     visibility = ["//visibility:public"],
 )
@@ -152,34 +161,34 @@ genrule(
         "include/cublas.h",
         "include/cudnn.h",
         "extras/CUPTI/include/cupti.h",
-        "lib64/libcudart_static.a",
-        "lib64/libcublas.so" + tf_get_cuda_version(),
-        "lib64/libcudnn.so" + tf_get_cudnn_version(),
-        "lib64/libcudart.so" + tf_get_cuda_version(),
-        "lib64/libcufft.so" + tf_get_cuda_version(),
-        "extras/CUPTI/lib64/libcupti.so" + tf_get_cuda_version(),
+        cuda_static_library_path("cudart"),
+        cuda_library_path("cublas"),
+        cudnn_library_path(),
+        cuda_library_path("cudart"),
+        cuda_library_path("cufft"),
+        cupti_library_path(),
     ],
     cmd = if_cuda(
         # Under cuda config, create all the symbolic links to the actual cuda files
-        "OUTPUTDIR=`readlink -f $(@D)/../../..`; cd `dirname $(location :cuda_config.sh)`; OUTPUTDIR=$$OUTPUTDIR ./cuda_config.sh --check;",
+        "OUTPUTDIR=`{} -f $(@D)/../../..`; cd third_party/gpus/cuda; OUTPUTDIR=$$OUTPUTDIR ./cuda_config.sh --check;".format(readlink_command()),
 
         # Under non-cuda config, create all dummy files to make the build go through
         ";".join([
-          "mkdir -p $(@D)/include",
-          "mkdir -p $(@D)/lib64",
-          "mkdir -p $(@D)/extras/CUPTI/include",
-          "mkdir -p $(@D)/extras/CUPTI/lib64",
-          "touch $(@D)/include/cuda.h",
-          "touch $(@D)/include/cublas.h",
-          "touch $(@D)/include/cudnn.h",
-          "touch $(@D)/extras/CUPTI/include/cupti.h",
-          "touch $(@D)/lib64/libcudart_static.a",
-          "touch $(@D)/lib64/libcublas.so" + tf_get_cuda_version(),
-          "touch $(@D)/lib64/libcudnn.so" + tf_get_cudnn_version(),
-          "touch $(@D)/lib64/libcudart.so" + tf_get_cuda_version(),
-          "touch $(@D)/lib64/libcufft.so" + tf_get_cuda_version(),
-          "touch $(@D)/extras/CUPTI/lib64/libcupti.so" + tf_get_cuda_version(),
-            ]),
+           "mkdir -p $(@D)/include",
+           "mkdir -p $(@D)/lib64",
+           "mkdir -p $(@D)/extras/CUPTI/include",
+           "mkdir -p $(@D)/extras/CUPTI/lib64",
+           "touch $(@D)/include/cuda.h",
+           "touch $(@D)/include/cublas.h",
+           "touch $(@D)/include/cudnn.h",
+           "touch $(@D)/extras/CUPTI/include/cupti.h",
+           "touch $(@D)/{}".format(cuda_static_library_path("cudart")),
+           "touch $(@D)/{}".format(cuda_library_path("cublas")),
+           "touch $(@D)/{}".format(cudnn_library_path()),
+           "touch $(@D)/{}".format(cuda_library_path("cudart")),
+           "touch $(@D)/{}".format(cuda_library_path("cufft")),
+           "touch $(@D)/{}".format(cupti_library_path()),
+         ]),
     ),
     local = 1,
 )
@@ -191,7 +200,7 @@ genrule(
     ],
     cmd = if_cuda(
         # Under cuda config, create the symbolic link to the actual cuda.config
-        "configfile=$(location :cuda.config); ln -sf `readlink -f $${configfile#*/*/*/}` $(@D)/;",
+        "configfile=$(location :cuda.config); ln -sf `{} -f $${{configfile#*/*/*/}}` $(@D)/;".format(readlink_command()),
 
         # Under non-cuda config, create the dummy file
         ";".join([
diff --git a/third_party/gpus/cuda/cuda_config.sh b/third_party/gpus/cuda/cuda_config.sh
index e93a7ed741..0e1106bb70 100755
--- a/third_party/gpus/cuda/cuda_config.sh
+++ b/third_party/gpus/cuda/cuda_config.sh
@@ -54,7 +54,19 @@ source cuda.config || exit -1
 
 OUTPUTDIR=${OUTPUTDIR:-../../..}
 CUDA_TOOLKIT_PATH=${CUDA_TOOLKIT_PATH:-/usr/local/cuda}
-CUDNN_INSTALL_PATH=${CUDNN_INSTALL_PATH:-/usr/local/cuda}
+CUDNN_INSTALL_BASEDIR=${CUDNN_INSTALL_PATH:-/usr/local/cuda}
+
+if [[ -z "$TF_CUDA_VERSION" ]]; then
+  TF_CUDA_EXT=""
+else
+  TF_CUDA_EXT=".$TF_CUDA_VERSION"
+fi
+
+if [[ -z "$TF_CUDNN_VERSION" ]]; then
+  TF_CUDNN_EXT=""
+else
+  TF_CUDNN_EXT=".$TF_CUDNN_VERSION"
+fi
 
 # An error message when the Cuda toolkit is not found
 function CudaError {
@@ -99,59 +111,84 @@ function CheckAndLinkToSrcTree {
 
   # Link the output file to the source tree, avoiding self links if they are
   # the same. This could happen if invoked from the source tree by accident.
-  if [ ! $(readlink -f $PWD) == $(readlink -f $OUTPUTDIR/third_party/gpus/cuda) ]; then
+  if [ ! $($READLINK_CMD -f $PWD) == $($READLINK_CMD -f $OUTPUTDIR/third_party/gpus/cuda) ]; then
     mkdir -p $(dirname $OUTPUTDIR/third_party/gpus/cuda/$FILE)
     ln -sf $PWD/$FILE $OUTPUTDIR/third_party/gpus/cuda/$FILE
   fi
 }
 
+OSNAME=`uname -s`
+if [ "$OSNAME" == "Linux" ]; then
+  CUDA_LIB_PATH="lib64"
+  CUDA_CUPTI_LIB_DIR="extras/CUPTI/lib64"
+  CUDA_RT_LIB_PATH="lib64/libcudart.so${TF_CUDA_EXT}"
+  CUDA_RT_LIB_STATIC_PATH="lib64/libcudart_static.a"
+  CUDA_BLAS_LIB_PATH="lib64/libcublas.so${TF_CUDA_EXT}"
+  CUDA_DNN_LIB_PATH="lib64/libcudnn.so${TF_CUDNN_EXT}"
+  CUDA_DNN_LIB_ALT_PATH="libcudnn.so${TF_CUDNN_EXT}"
+  CUDA_FFT_LIB_PATH="lib64/libcufft.so${TF_CUDA_EXT}"
+  CUDA_CUPTI_LIB_PATH="extras/CUPTI/lib64/libcupti.so${TF_CUDA_EXT}"
+  READLINK_CMD="readlink"
+elif [ "$OSNAME" == "Darwin" ]; then
+  CUDA_LIB_PATH="lib"
+  CUDA_CUPTI_LIB_DIR="extras/CUPTI/lib"
+  CUDA_RT_LIB_PATH="lib/libcudart${TF_CUDA_EXT}.dylib"
+  CUDA_RT_LIB_STATIC_PATH="lib/libcudart_static.a"
+  CUDA_BLAS_LIB_PATH="lib/libcublas${TF_CUDA_EXT}.dylib"
+  CUDA_DNN_LIB_PATH="lib/libcudnn${TF_CUDNN_EXT}.dylib"
+  CUDA_DNN_LIB_ALT_PATH="libcudnn${TF_CUDNN_EXT}.dylib"
+  CUDA_FFT_LIB_PATH="lib/libcufft${TF_CUDA_EXT}.dylib"
+  CUDA_CUPTI_LIB_PATH="extras/CUPTI/lib/libcupti${TF_CUDA_EXT}.dylib"
+  READLINK_CMD="greadlink"
+fi
+
 if [ "$CHECK_ONLY" == "1" ]; then
   CheckAndLinkToSrcTree CudaError include/cuda.h
   CheckAndLinkToSrcTree CudaError include/cublas.h
   CheckAndLinkToSrcTree CudnnError include/cudnn.h
   CheckAndLinkToSrcTree CudaError extras/CUPTI/include/cupti.h
-  CheckAndLinkToSrcTree CudaError lib64/libcudart_static.a
-  CheckAndLinkToSrcTree CudaError lib64/libcublas.so$TF_CUDA_VERSION
-  CheckAndLinkToSrcTree CudnnError lib64/libcudnn.so$TF_CUDNN_VERSION
-  CheckAndLinkToSrcTree CudaError lib64/libcudart.so$TF_CUDA_VERSION
-  CheckAndLinkToSrcTree CudaError lib64/libcufft.so$TF_CUDA_VERSION
-  CheckAndLinkToSrcTree CudaError extras/CUPTI/lib64/libcupti.so$TF_CUDA_VERSION
+  CheckAndLinkToSrcTree CudaError $CUDA_RT_LIB_STATIC_PATH
+  CheckAndLinkToSrcTree CudaError $CUDA_BLAS_LIB_PATH
+  CheckAndLinkToSrcTree CudnnError $CUDA_DNN_LIB_PATH
+  CheckAndLinkToSrcTree CudaError $CUDA_RT_LIB_PATH
+  CheckAndLinkToSrcTree CudaError $CUDA_FFT_LIB_PATH
+  CheckAndLinkToSrcTree CudaError $CUDA_CUPTI_LIB_PATH
   exit 0
 fi
 
 # Actually configure the source tree for TensorFlow's canonical view of Cuda
 # libraries.
 
-if test ! -e ${CUDA_TOOLKIT_PATH}/lib64/libcudart.so$TF_CUDA_VERSION; then
-  CudaError "cannot find ${CUDA_TOOLKIT_PATH}/lib64/libcudart.so$TF_CUDA_VERSION"
+if test ! -e ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}; then
+  CudaError "cannot find ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}"
 fi
 
-if test ! -e ${CUDA_TOOLKIT_PATH}/extras/CUPTI/lib64/libcupti.so$TF_CUDA_VERSION; then
-  CudaError "cannot find ${CUDA_TOOLKIT_PATH}/extras/CUPTI/lib64/libcupti.so$TF_CUDA_VERSION"
+if test ! -e ${CUDA_TOOLKIT_PATH}/${CUDA_CUPTI_LIB_PATH}; then
+  CudaError "cannot find ${CUDA_TOOLKIT_PATH}/${CUDA_CUPTI_LIB_PATH}"
 fi
 
-if test ! -d ${CUDNN_INSTALL_PATH}; then
-  CudnnError "cannot find dir: ${CUDNN_INSTALL_PATH}"
+if test ! -d ${CUDNN_INSTALL_BASEDIR}; then
+  CudnnError "cannot find dir: ${CUDNN_INSTALL_BASEDIR}"
 fi
 
 # Locate cudnn.h
-if test -e ${CUDNN_INSTALL_PATH}/cudnn.h; then
-  CUDNN_HEADER_PATH=${CUDNN_INSTALL_PATH}
-elif test -e ${CUDNN_INSTALL_PATH}/include/cudnn.h; then
-  CUDNN_HEADER_PATH=${CUDNN_INSTALL_PATH}/include
+if test -e ${CUDNN_INSTALL_BASEDIR}/cudnn.h; then
+  CUDNN_HEADER_DIR=${CUDNN_INSTALL_BASEDIR}
+elif test -e ${CUDNN_INSTALL_BASEDIR}/include/cudnn.h; then
+  CUDNN_HEADER_DIR=${CUDNN_INSTALL_BASEDIR}/include
 elif test -e /usr/include/cudnn.h; then
-  CUDNN_HEADER_PATH=/usr/include
+  CUDNN_HEADER_DIR=/usr/include
 else
-  CudnnError "cannot find cudnn.h under: ${CUDNN_INSTALL_PATH} or /usr/include"
+  CudnnError "cannot find cudnn.h under: ${CUDNN_INSTALL_BASEDIR}"
 fi
 
-# Locate libcudnn.so.${$TF_CUDNN_VERSION}
-if test -e ${CUDNN_INSTALL_PATH}/libcudnn.so$TF_CUDNN_VERSION; then
-  CUDNN_LIB_PATH=${CUDNN_INSTALL_PATH}
-elif test -e ${CUDNN_INSTALL_PATH}/lib64/libcudnn.so$TF_CUDNN_VERSION; then
-  CUDNN_LIB_PATH=${CUDNN_INSTALL_PATH}/lib64
+# Locate libcudnn
+if test -e ${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_PATH}; then
+  CUDNN_LIB_INSTALL_PATH=${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_PATH}
+elif test -e ${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_ALT_PATH}; then
+  CUDNN_LIB_INSTALL_PATH=${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_ALT_PATH}
 else
-  CudnnError "cannot find libcudnn.so.$TF_CUDNN_VERSION under: ${CUDNN_INSTALL_PATH}"
+  CudnnError "cannot find ${CUDA_DNN_LIB_PATH} or ${CUDA_DNN_LIB_ALT_PATH} under: ${CUDNN_INSTALL_BASEDIR}"
 fi
 
 # Helper function to build symbolic links for all files under a directory.
@@ -181,8 +218,8 @@ function LinkAllFiles {
 mkdir -p $OUTPUTDIR/third_party/gpus/cuda
 echo "Setting up Cuda include"
 LinkAllFiles ${CUDA_TOOLKIT_PATH}/include $OUTPUTDIR/third_party/gpus/cuda/include || exit -1
-echo "Setting up Cuda lib64"
-LinkAllFiles ${CUDA_TOOLKIT_PATH}/lib64 $OUTPUTDIR/third_party/gpus/cuda/lib64 || exit -1
+echo "Setting up Cuda ${CUDA_LIB_PATH}"
+LinkAllFiles ${CUDA_TOOLKIT_PATH}/${CUDA_LIB_PATH} $OUTPUTDIR/third_party/gpus/cuda/${CUDA_LIB_PATH} || exit -1
 echo "Setting up Cuda bin"
 LinkAllFiles ${CUDA_TOOLKIT_PATH}/bin $OUTPUTDIR/third_party/gpus/cuda/bin || exit -1
 echo "Setting up Cuda nvvm"
@@ -190,8 +227,8 @@ LinkAllFiles ${CUDA_TOOLKIT_PATH}/nvvm $OUTPUTDIR/third_party/gpus/cuda/nvvm ||
 echo "Setting up CUPTI include"
 LinkAllFiles ${CUDA_TOOLKIT_PATH}/extras/CUPTI/include $OUTPUTDIR/third_party/gpus/cuda/extras/CUPTI/include || exit -1
 echo "Setting up CUPTI lib64"
-LinkAllFiles ${CUDA_TOOLKIT_PATH}/extras/CUPTI/lib64 $OUTPUTDIR/third_party/gpus/cuda/extras/CUPTI/lib64 || exit -1
+LinkAllFiles ${CUDA_TOOLKIT_PATH}/${CUDA_CUPTI_LIB_DIR} $OUTPUTDIR/third_party/gpus/cuda/${CUDA_CUPTI_LIB_DIR} || exit -1
 
 # Set up symbolic link for cudnn
-ln -sf $CUDNN_HEADER_PATH/cudnn.h $OUTPUTDIR/third_party/gpus/cuda/include/cudnn.h || exit -1
-ln -sf $CUDNN_LIB_PATH/libcudnn.so$TF_CUDNN_VERSION $OUTPUTDIR/third_party/gpus/cuda/lib64/libcudnn.so$TF_CUDNN_VERSION || exit -1
+ln -sf $CUDNN_HEADER_DIR/cudnn.h $OUTPUTDIR/third_party/gpus/cuda/include/cudnn.h || exit -1
+ln -sf $CUDNN_LIB_INSTALL_PATH $OUTPUTDIR/third_party/gpus/cuda/$CUDA_DNN_LIB_PATH || exit -1
diff --git a/third_party/gpus/cuda/platform.bzl b/third_party/gpus/cuda/platform.bzl
new file mode 100644
index 0000000000..06f3d0cff4
--- /dev/null
+++ b/third_party/gpus/cuda/platform.bzl
@@ -0,0 +1,57 @@
+CUDA_VERSION = ""
+CUDNN_VERSION = ""
+PLATFORM = ""
+
+def cuda_sdk_version():
+  return CUDA_VERSION
+
+def cudnn_sdk_version():
+  return CUDNN_VERSION
+
+def cuda_library_path(name, version = cuda_sdk_version()):
+  if PLATFORM == "Darwin":
+    if not version:
+      return "lib/lib{}.dylib".format(name)
+    else:
+      return "lib/lib{}.{}.dylib".format(name, version)
+  else:
+    if not version:
+      return "lib64/lib{}.so".format(name)
+    else:
+      return "lib64/lib{}.so.{}".format(name, version)
+
+def cuda_static_library_path(name):
+  if PLATFORM == "Darwin":
+    return "lib/lib{}_static.a".format(name)
+  else:
+    return "lib64/lib{}_static.a".format(name)
+
+def cudnn_library_path(version = cudnn_sdk_version()):
+  if PLATFORM == "Darwin":
+    if not version:
+      return "lib/libcudnn.dylib"
+    else:
+      return "lib/libcudnn.{}.dylib".format(version)
+  else:
+    if not version:
+      return "lib64/libcudnn.so"
+    else:
+      return "lib64/libcudnn.so.{}".format(version)
+
+def cupti_library_path(version = cuda_sdk_version()):
+  if PLATFORM == "Darwin":
+    if not version:
+      return "extras/CUPTI/lib/libcupti.dylib"
+    else:
+      return "extras/CUPTI/lib/libcupti.{}.dylib".format(version)
+  else:
+    if not version:
+      return "extras/CUPTI/lib64/libcupti.so"
+    else:
+      return "extras/CUPTI/lib64/libcupti.so.{}".format(version)
+
+def readlink_command():
+  if PLATFORM == "Darwin":
+    return "greadlink"
+  else:
+    return "readlink"
author	Ville Kallioniemi <ville.kallioniemi@gmail.com>	2016-04-27 21:49:10 -0600
committer	Vijay Vasudevan <vrv@google.com>	2016-04-27 20:49:10 -0700
commit	59faa82c072c473551b25695431fab613675bf24 (patch)
tree	b29d7f0ddbceb3967d2b7cfd66d045a1a98d6b4e
parent	ae3c8479f88da1cd5636b974f653f27755cb0034 (diff)