aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Ville Kallioniemi <ville.kallioniemi@gmail.com>2016-04-27 21:49:10 -0600
committerGravatar Vijay Vasudevan <vrv@google.com>2016-04-27 20:49:10 -0700
commit59faa82c072c473551b25695431fab613675bf24 (patch)
treeb29d7f0ddbceb3967d2b7cfd66d045a1a98d6b4e
parentae3c8479f88da1cd5636b974f653f27755cb0034 (diff)
Enable building with CUDA support on Mac OS X (#664)
* Enable building with CUDA support on Mac OS X * Building with CUDA support on OS X requires GNU coreutils due to the OS X native readlink command behaving differently from the GNU version * OS X requires CUDA toolkit 7.5 due to host compiler incompatibility with earlier CUDA toolkits - the toolkit versions (CUDA & cuDNN) are now controlled by variables set in the configure script * Fix symlink creation for cuDNN when found in alternative location. * * fix library name formatting broken during rebase * point to Eigen fork that builds on OS X - pending Eigen PR merge * Do not auto generate platform.bzl - modify it as part of unofficial settings. * Upgrade to latest Eigen and fix issues introduced during rebase. * Fix breaking changes brought in by rebase. * * remove redundant cuda/cudnn version functionality * minimize unnecessary diffs against master branch to ease review/merge * Address code review comments: * ensure configure generated platform.bzl values default to empty * fix cudnn default version * Fix syntax error introduced during rebase. * Address code review comments - conform to style and add comments. * Handle empty library versions. * Default to symlinked versions of CUDA libraries when no version is specified. * Add cudnn.h header search path accidentally lost during rebase.
-rwxr-xr-xconfigure71
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_tracer.cc2
-rw-r--r--tensorflow/core/platform/default/build_config.bzl11
-rw-r--r--tensorflow/core/platform/default/build_config/BUILD29
-rw-r--r--tensorflow/core/platform/load_library.cc18
-rw-r--r--tensorflow/core/platform/load_library.h3
-rw-r--r--tensorflow/stream_executor/cuda/cuda_diagnostics.cc101
-rw-r--r--tensorflow/stream_executor/cuda/cuda_diagnostics.h6
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.cc3
-rw-r--r--tensorflow/stream_executor/cuda/cuda_gpu_executor.cc18
-rw-r--r--tensorflow/stream_executor/dso_loader.cc73
-rw-r--r--tensorflow/stream_executor/dso_loader.h5
-rw-r--r--tensorflow/stream_executor/lib/static_threadlocal.h2
-rw-r--r--third_party/gpus/crosstool/BUILD14
-rw-r--r--third_party/gpus/crosstool/CROSSTOOL92
-rwxr-xr-xthird_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc2
-rw-r--r--third_party/gpus/cuda/BUILD87
-rwxr-xr-xthird_party/gpus/cuda/cuda_config.sh99
-rw-r--r--third_party/gpus/cuda/platform.bzl57
19 files changed, 554 insertions, 139 deletions
diff --git a/configure b/configure
index 0a7d697c40..0a43b31468 100755
--- a/configure
+++ b/configure
@@ -78,6 +78,8 @@ done
# Find out where the CUDA toolkit is installed
+OSNAME=`uname -s`
+
while true; do
# Configure the Cuda SDK version to use.
if [ -z "$TF_CUDA_VERSION" ]; then
@@ -93,15 +95,24 @@ while true; do
CUDA_TOOLKIT_PATH=$default_cuda_path
fi
fi
+
if [[ -z "$TF_CUDA_VERSION" ]]; then
TF_CUDA_EXT=""
else
TF_CUDA_EXT=".$TF_CUDA_VERSION"
fi
- if [ -e $CUDA_TOOLKIT_PATH/lib64/libcudart.so$TF_CUDA_EXT ]; then
+
+ if [ "$OSNAME" == "Linux" ]; then
+ CUDA_RT_LIB_PATH="lib64/libcudart.so${TF_CUDA_EXT}"
+ elif [ "$OSNAME" == "Darwin" ]; then
+ CUDA_RT_LIB_PATH="lib/libcudart${TF_CUDA_EXT}.dylib"
+ fi
+
+ if [ -e "${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}" ]; then
break
fi
- echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. $CUDA_TOOLKIT_PATH/lib64/libcudart.so$TF_CUDA_EXT cannot be found"
+ echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH} cannot be found"
+
if [ -z "$fromuser" ]; then
exit 1
fi
@@ -127,25 +138,41 @@ while true; do
fi
# Result returned from "read" will be used unexpanded. That make "~" unuseable.
# Going through one more level of expansion to handle that.
- CUDNN_INSTALL_PATH=$(bash -c "readlink -f $CUDNN_INSTALL_PATH")
+ CUDNN_INSTALL_PATH=`${PYTHON_BIN_PATH} -c "import os; print(os.path.realpath(os.path.expanduser('${CUDNN_INSTALL_PATH}')))"`
fi
+
if [[ -z "$TF_CUDNN_VERSION" ]]; then
TF_CUDNN_EXT=""
else
TF_CUDNN_EXT=".$TF_CUDNN_VERSION"
fi
- if [ -e "$CUDNN_INSTALL_PATH/libcudnn.so${TF_CUDNN_EXT}" -o -e "$CUDNN_INSTALL_PATH/lib64/libcudnn.so${TF_CUDNN_EXT}" ]; then
- break
+
+ if [ "$OSNAME" == "Linux" ]; then
+ CUDA_DNN_LIB_PATH="lib64/libcudnn.so${TF_CUDNN_EXT}"
+ CUDA_DNN_LIB_ALT_PATH="libcudnn.so${TF_CUDNN_EXT}"
+ elif [ "$OSNAME" == "Darwin" ]; then
+ CUDA_DNN_LIB_PATH="lib/libcudnn${TF_CUDNN_EXT}.dylib"
+ CUDA_DNN_LIB_ALT_PATH="libcudnn${TF_CUDNN_EXT}.dylib"
fi
- CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
- if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
- CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
+
+ if [ -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_ALT_PATH}" -o -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_PATH}" ]; then
break
fi
- echo "Invalid path to cuDNN ${TF_CUDNN_VERSION} toolkit. Neither of the following two files can be found:"
- echo "$CUDNN_INSTALL_PATH/lib64/libcudnn.so${TF_CUDNN_EXT}"
- echo "$CUDNN_INSTALL_PATH/libcudnn.so${TF_CUDNN_EXT}"
- echo "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}"
+
+ if [ "$OSNAME" == "Linux" ]; then
+ CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
+ if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
+ CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
+ break
+ fi
+ fi
+ echo "Invalid path to cuDNN ${CUDNN_VERSION} toolkit. Neither of the following two files can be found:"
+ echo "${CUDNN_INSTALL_PATH}/${CUDA_DNN_LIB_PATH}"
+ echo "${CUDNN_INSTALL_PATH}/${CUDA_DNN_LIB_ALT_PATH}"
+ if [ "$OSNAME" == "Linux" ]; then
+ echo "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}"
+ fi
+
if [ -z "$fromuser" ]; then
exit 1
fi
@@ -157,18 +184,16 @@ done
cat > third_party/gpus/cuda/cuda.config <<EOF
# CUDA_TOOLKIT_PATH refers to the CUDA toolkit.
CUDA_TOOLKIT_PATH="$CUDA_TOOLKIT_PATH"
-
# CUDNN_INSTALL_PATH refers to the cuDNN toolkit. The cuDNN header and library
# files can be either in this directory, or under include/ and lib64/
# directories separately.
CUDNN_INSTALL_PATH="$CUDNN_INSTALL_PATH"
# The Cuda SDK version that should be used in this build (empty to use libcudart.so symlink)
-TF_CUDA_VERSION=$TF_CUDA_EXT
-
-# The Cudnn version that should be used in this build (empty to use libcudnn.so symlink)
-TF_CUDNN_VERSION=$TF_CUDNN_EXT
+TF_CUDA_VERSION=$TF_CUDA_VERSION
+# The Cudnn version that should be used in this build
+TF_CUDNN_VERSION=$TF_CUDNN_VERSION
EOF
# Configure the gcc host compiler to use
@@ -176,13 +201,17 @@ export WARNING=$DO_NOT_SUBMIT_WARNING
perl -pi -e "s,CPU_COMPILER = \('.*'\),# \$ENV{WARNING}\nCPU_COMPILER = ('$GCC_HOST_COMPILER_PATH'),s" third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
perl -pi -e "s,GCC_HOST_COMPILER_PATH = \('.*'\),# \$ENV{WARNING}\nGCC_HOST_COMPILER_PATH = ('$GCC_HOST_COMPILER_PATH'),s" third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
+# Configure the platform name.
+perl -pi -e "s,PLATFORM = \".*\",PLATFORM = \"$OSNAME\",s" third_party/gpus/cuda/platform.bzl
+
# Configure the Cuda toolkit version to work with.
-perl -pi -e "s,CUDA_VERSION = \"[0-9\.]*\",CUDA_VERSION = \"$TF_CUDA_EXT\",s" tensorflow/core/platform/default/build_config.bzl
-perl -pi -e "s,(GetCudaVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDA_EXT\",s" tensorflow/stream_executor/dso_loader.cc
+perl -pi -e "s,(GetCudaVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDA_VERSION\",s" tensorflow/stream_executor/dso_loader.cc
+perl -pi -e "s,CUDA_VERSION = \"[0-9\.]*\",CUDA_VERSION = \"$TF_CUDA_VERSION\",s" third_party/gpus/cuda/platform.bzl
# Configure the Cudnn version to work with.
-perl -pi -e "s,CUDNN_VERSION = \"[0-9\.]*\",CUDNN_VERSION = \"$TF_CUDNN_EXT\",s" tensorflow/core/platform/default/build_config.bzl
-perl -pi -e "s,(GetCudnnVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDNN_EXT\",s" tensorflow/stream_executor/dso_loader.cc
+perl -pi -e "s,(GetCudnnVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDNN_VERSION\",s" tensorflow/stream_executor/dso_loader.cc
+perl -pi -e "s,CUDNN_VERSION = \"[0-9\.]*\",CUDNN_VERSION = \"$TF_CUDNN_VERSION\",s" third_party/gpus/cuda/platform.bzl
+
# Configure the compute capabilities that TensorFlow builds for.
# Since Cuda toolkit is not backward-compatible, this is not guaranteed to work.
diff --git a/tensorflow/core/common_runtime/gpu/gpu_tracer.cc b/tensorflow/core/common_runtime/gpu/gpu_tracer.cc
index 30d59fe7ba..08a3a5962a 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_tracer.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_tracer.cc
@@ -257,7 +257,7 @@ CUPTIManager *GetCUPTIManager() {
// TODO(pbar) Move this to platform specific header file?
// Static thread local variable for POD types.
#define TF_STATIC_THREAD_LOCAL_POD(_Type_, _var_) \
- static thread_local _Type_ s_obj_##_var_; \
+ static __thread _Type_ s_obj_##_var_; \
namespace { \
class ThreadLocal_##_var_ { \
public: \
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 633441f31b..6b3d85ded4 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -3,11 +3,6 @@
load("//google/protobuf:protobuf.bzl", "cc_proto_library")
load("//google/protobuf:protobuf.bzl", "py_proto_library")
-# configure may change the following lines to '.X.Y' or similar
-CUDA_VERSION = ""
-
-CUDNN_VERSION = ""
-
# Appends a suffix to a list of deps.
def tf_deps(deps, suffix):
tf_deps = []
@@ -96,9 +91,3 @@ def tf_additional_test_srcs():
def tf_kernel_tests_linkstatic():
return 0
-
-def tf_get_cuda_version():
- return CUDA_VERSION
-
-def tf_get_cudnn_version():
- return CUDNN_VERSION
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index da86245cc1..994b6a3a3c 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -9,7 +9,7 @@ exports_files(["LICENSE"])
load("//tensorflow:tensorflow.bzl", "tf_copts")
load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
-load("//tensorflow/core:platform/default/build_config.bzl", "tf_get_cuda_version")
+load("/third_party/gpus/cuda/platform", "cuda_library_path")
cc_library(
name = "gtest",
@@ -31,7 +31,16 @@ tf_cuda_library(
name = "stream_executor",
deps = [
"//tensorflow/stream_executor",
- ],
+ ] + select({
+ "//third_party/gpus/cuda:darwin": ["IOKit"],
+ "//conditions:default": []
+ }),
+)
+
+# OSX framework for device driver access
+cc_library(
+ name = "IOKit",
+ linkopts = ["-framework IOKit"],
)
cc_library(
@@ -69,12 +78,18 @@ filegroup(
cc_library(
name = "cuda",
data = [
- "//third_party/gpus/cuda:lib64/libcudart.so" + tf_get_cuda_version(),
- ],
- linkopts = [
- "-Wl,-rpath,third_party/gpus/cuda/lib64",
- "-Wl,-rpath,third_party/gpus/cuda/extras/CUPTI/lib64",
+ "//third_party/gpus/cuda:{}".format(cuda_library_path("cudart")),
],
+ linkopts = select({
+ "//third_party/gpus/cuda:darwin": [
+ "-Wl,-rpath,third_party/gpus/cuda/lib",
+ "-Wl,-rpath,third_party/gpus/cuda/extras/CUPTI/lib"
+ ],
+ "//conditions:default": [
+ "-Wl,-rpath,third_party/gpus/cuda/lib64",
+ "-Wl,-rpath,third_party/gpus/cuda/extras/CUPTI/lib64"
+ ]
+ }),
deps = [
"//third_party/gpus/cuda:cudart",
],
diff --git a/tensorflow/core/platform/load_library.cc b/tensorflow/core/platform/load_library.cc
index b8a93906f1..24fdcfd1fc 100644
--- a/tensorflow/core/platform/load_library.cc
+++ b/tensorflow/core/platform/load_library.cc
@@ -49,6 +49,24 @@ Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
return Status::OK();
}
+string FormatLibraryFileName(const string& name, const string& version) {
+ string filename;
+#if defined(__APPLE__)
+ if (version.size() == 0) {
+ filename = "lib" + name + ".dylib";
+ } else {
+ filename = "lib" + name + "." + version + ".dylib";
+ }
+#else
+ if (version.size() == 0) {
+ filename = "lib" + name + ".so";
+ } else {
+ filename = "lib" + name + ".so" + "." + version;
+ }
+#endif
+ return filename;
+}
+
} // namespace internal
} // namespace tensorflow
diff --git a/tensorflow/core/platform/load_library.h b/tensorflow/core/platform/load_library.h
index b67e8835c5..96c3cab156 100644
--- a/tensorflow/core/platform/load_library.h
+++ b/tensorflow/core/platform/load_library.h
@@ -25,6 +25,9 @@ namespace internal {
Status LoadLibrary(const char* library_filename, void** handle);
Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
void** symbol);
+// Return the filename of a dynamically linked library formatted according to
+// platform naming conventions
+string FormatLibraryFileName(const string& name, const string& version);
} // namespace internal
diff --git a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
index 87dd42063c..f24bedc61d 100644
--- a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
+++ b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
@@ -17,13 +17,18 @@ limitations under the License.
#include <dirent.h>
#include <limits.h>
-#include <link.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#ifdef __APPLE__
+#include <IOKit/kext/KextManager.h>
+#include <mach-o/dyld.h>
+#else
+#include <link.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
+#endif
#include <unistd.h>
#include <algorithm>
#include <memory>
@@ -45,10 +50,15 @@ namespace perftools {
namespace gputools {
namespace cuda {
+#ifdef __APPLE__
+static const CFStringRef kDriverKextIdentifier = CFSTR("com.nvidia.CUDA");
+#else
static const char *kDriverVersionPath = "/proc/driver/nvidia/version";
+#endif
+
string DriverVersionToString(DriverVersion version) {
- return port::Printf("%d.%d", std::get<0>(version), std::get<1>(version));
+ return port::Printf("%d.%d.%d", std::get<0>(version), std::get<1>(version), std::get<2>(version));
}
string DriverVersionStatusToString(port::StatusOr<DriverVersion> version) {
@@ -61,15 +71,16 @@ string DriverVersionStatusToString(port::StatusOr<DriverVersion> version) {
port::StatusOr<DriverVersion> StringToDriverVersion(const string &value) {
std::vector<string> pieces = port::Split(value, '.');
- if (pieces.size() != 2) {
+ if (pieces.size() != 2 && pieces.size() != 3) {
return port::Status{
port::error::INVALID_ARGUMENT,
- port::Printf("expected %%d.%%d form for driver version; got \"%s\"",
+ port::Printf("expected %%d.%%d or %%d.%%d.%%d form for driver version; got \"%s\"",
value.c_str())};
}
int major;
int minor;
+ int patch = 0;
if (!port::safe_strto32(pieces[0], &major)) {
return port::Status{
port::error::INVALID_ARGUMENT,
@@ -84,8 +95,15 @@ port::StatusOr<DriverVersion> StringToDriverVersion(const string &value) {
"integer from string \"%s\"",
pieces[1].c_str(), value.c_str())};
}
+ if (pieces.size() == 3 && !port::safe_strto32(pieces[2], &patch)) {
+ return port::Status{
+ port::error::INVALID_ARGUMENT,
+ port::Printf("could not parse patch version number \"%s\" as an "
+ "integer from string \"%s\"",
+ pieces[2].c_str(), value.c_str())};
+ }
- DriverVersion result{major, minor};
+ DriverVersion result{major, minor, patch};
VLOG(2) << "version string \"" << value << "\" made value "
<< DriverVersionToString(result);
return result;
@@ -98,6 +116,26 @@ string Diagnostician::GetDevNodePath(int dev_node_ordinal) {
}
void Diagnostician::LogDiagnosticInformation() {
+#ifdef __APPLE__
+ CFStringRef kext_ids[1];
+ kext_ids[0] = kDriverKextIdentifier;
+ CFArrayRef kext_id_query = CFArrayCreate(nullptr, (const void**)kext_ids, 1, &kCFTypeArrayCallBacks);
+ CFDictionaryRef kext_infos = KextManagerCopyLoadedKextInfo(kext_id_query, nullptr);
+ CFRelease(kext_id_query);
+
+ CFDictionaryRef cuda_driver_info = nullptr;
+ if (CFDictionaryGetValueIfPresent(kext_infos, kDriverKextIdentifier, (const void**)&cuda_driver_info)) {
+ bool started = CFBooleanGetValue((CFBooleanRef)CFDictionaryGetValue(cuda_driver_info, CFSTR("OSBundleStarted")));
+ if (!started) {
+ LOG(INFO) << "kernel driver is installed, but does not appear to be running on this host "
+ << "(" << port::Hostname() << ")";
+ }
+ } else {
+ LOG(INFO) << "kernel driver does not appear to be installed on this host "
+ << "(" << port::Hostname() << ")";
+ }
+ CFRelease(kext_infos);
+#else
if (access(kDriverVersionPath, F_OK) != 0) {
LOG(INFO) << "kernel driver does not appear to be running on this host "
<< "(" << port::Hostname() << "): "
@@ -110,6 +148,7 @@ void Diagnostician::LogDiagnosticInformation() {
<< " does not exist";
return;
}
+#endif
LOG(INFO) << "retrieving CUDA diagnostic information for host: "
<< port::Hostname();
@@ -149,9 +188,13 @@ void Diagnostician::LogDiagnosticInformation() {
port::StatusOr<DriverVersion> kernel_version = FindKernelDriverVersion();
LOG(INFO) << "kernel reported version is: "
<< DriverVersionStatusToString(kernel_version);
+
+ // OS X kernel driver does not report version accurately
+#if !defined(__APPLE__)
if (kernel_version.ok() && dso_version.ok()) {
WarnOnDsoKernelMismatch(dso_version, kernel_version);
}
+#endif
}
// Iterates through loaded DSOs with DlIteratePhdrCallback to find the
@@ -161,6 +204,29 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
port::error::NOT_FOUND,
"was unable to find libcuda.so DSO loaded into this program"}};
+#if defined(__APPLE__)
+ // OSX CUDA libraries have names like: libcuda_310.41.15_mercury.dylib
+ const string prefix("libcuda_");
+ const string suffix("_mercury.dylib");
+ for (uint32_t image_index = 0; image_index < _dyld_image_count(); ++image_index) {
+ const string path(_dyld_get_image_name(image_index));
+ const size_t suffix_pos = path.rfind(suffix);
+ const size_t prefix_pos = path.rfind(prefix, suffix_pos);
+ if (prefix_pos == string::npos ||
+ suffix_pos == string::npos) {
+ // no match
+ continue;
+ }
+ const size_t start = prefix_pos + prefix.size();
+ if (start >= suffix_pos) {
+ // version not included
+ continue;
+ }
+ const size_t length = suffix_pos - start;
+ const string version = path.substr(start, length);
+ result = StringToDriverVersion(version);
+ }
+#else
// Callback used when iterating through DSOs. Looks for the driver-interfacing
// DSO and yields its version number into the callback data, when found.
auto iterate_phdr =
@@ -192,6 +258,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
};
dl_iterate_phdr(iterate_phdr, &result);
+#endif
return result;
}
@@ -236,6 +303,29 @@ void Diagnostician::WarnOnDsoKernelMismatch(
port::StatusOr<DriverVersion> Diagnostician::FindKernelDriverVersion() {
+#if defined(__APPLE__)
+ CFStringRef kext_ids[1];
+ kext_ids[0] = kDriverKextIdentifier;
+ CFArrayRef kext_id_query = CFArrayCreate(nullptr, (const void**)kext_ids, 1, &kCFTypeArrayCallBacks);
+ CFDictionaryRef kext_infos = KextManagerCopyLoadedKextInfo(kext_id_query, nullptr);
+ CFRelease(kext_id_query);
+
+ CFDictionaryRef cuda_driver_info = nullptr;
+ if (CFDictionaryGetValueIfPresent(kext_infos, kDriverKextIdentifier, (const void**)&cuda_driver_info)) {
+ // NOTE: OSX CUDA driver does not currently store the same driver version
+ // in kCFBundleVersionKey as is returned by cuDriverGetVersion
+ const char * version = CFStringGetCStringPtr((CFStringRef)CFDictionaryGetValue(cuda_driver_info, kCFBundleVersionKey), kCFStringEncodingUTF8);
+ CFRelease(kext_infos);
+ return StringToDriverVersion(version);
+ }
+ CFRelease(kext_infos);
+ auto status =
+ port::Status{port::error::INTERNAL,
+ port::StrCat("failed to read driver bundle version: ",
+ CFStringGetCStringPtr(kDriverKextIdentifier, kCFStringEncodingUTF8))
+ };
+ return status;
+#else
FILE *driver_version_file = fopen(kDriverVersionPath, "r");
if (driver_version_file == nullptr) {
return port::Status{
@@ -267,6 +357,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindKernelDriverVersion() {
ferror(driver_version_file))};
fclose(driver_version_file);
return status;
+#endif
}
diff --git a/tensorflow/stream_executor/cuda/cuda_diagnostics.h b/tensorflow/stream_executor/cuda/cuda_diagnostics.h
index 42336c337f..e98d32f286 100644
--- a/tensorflow/stream_executor/cuda/cuda_diagnostics.h
+++ b/tensorflow/stream_executor/cuda/cuda_diagnostics.h
@@ -26,8 +26,8 @@ namespace perftools {
namespace gputools {
namespace cuda {
-// e.g. DriverVersion{331, 79}
-using DriverVersion = std::tuple<int, int>;
+// e.g. DriverVersion{346, 3, 4}
+using DriverVersion = std::tuple<int, int, int>;
// Converts a parsed driver version to string form.
string DriverVersionToString(DriverVersion version);
@@ -72,8 +72,6 @@ class Diagnostician {
static void LogDriverVersionInformation();
private:
- // Logs information about the loaded nvidia-related kernel modules.
- static void LogKernelModuleInformation();
// Given the DSO version number and the driver version file contents, extracts
// the driver version and compares, warning the user in the case of
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index f21065690c..7034822356 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -302,10 +302,13 @@ port::Status CudnnSupport::Init() {
} else {
const auto& version = result.ValueOrDie();
LOG(INFO) << "running driver version: " << DriverVersionToString(version);
+ // OS X kernel driver does not report version accurately
+#if !defined(__APPLE__)
if (std::get<0>(version) < 340) {
LOG(ERROR)
<< "cudnn library is only supported on 340.XX+ driver versions";
}
+#endif
}
}
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index f98eec3b33..b343af0fce 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -15,6 +15,9 @@ limitations under the License.
#include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
+#if defined(__APPLE__)
+#include <mach-o/dyld.h>
+#endif
#include <unistd.h>
#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
@@ -194,7 +197,15 @@ bool CUDAExecutor::FindOnDiskForComputeCapability(
// would return /usr/bin.
static string GetBinaryDir(bool strip_exe) {
char exe_path[PATH_MAX] = {0};
- CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
+#if defined(__APPLE__)
+ uint32_t buffer_size = 0U;
+ _NSGetExecutablePath(nullptr, &buffer_size);
+ char unresolved_path[buffer_size];
+ _NSGetExecutablePath(unresolved_path, &buffer_size);
+ CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1);
+#else
+ CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
+#endif
// Make sure it's null-terminated:
exe_path[sizeof(exe_path) - 1] = 0;
@@ -868,6 +879,10 @@ CUcontext CUDAExecutor::cuda_context() { return context_; }
// For anything more complicated/prod-focused than this, you'll likely want to
// turn to gsys' topology modeling.
static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
+#if defined(__APPLE__)
+ LOG(INFO) << "OS X does not support NUMA - returning NUMA node zero";
+ return 0;
+#else
VLOG(2) << "trying to read NUMA node for device ordinal: " << device_ordinal;
static const int kUnknownNumaNode = -1;
@@ -910,6 +925,7 @@ static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
<< content;
return kUnknownNumaNode;
+#endif
}
// Set of compute capability specific device parameters that cannot be
diff --git a/tensorflow/stream_executor/dso_loader.cc b/tensorflow/stream_executor/dso_loader.cc
index caf10a9003..bf7faef209 100644
--- a/tensorflow/stream_executor/dso_loader.cc
+++ b/tensorflow/stream_executor/dso_loader.cc
@@ -17,12 +17,15 @@ limitations under the License.
#include <dlfcn.h>
#include <limits.h>
+#if defined(__APPLE__)
+#include <mach-o/dyld.h>
+#endif
#include <stdlib.h>
#include <unistd.h>
#include <initializer_list>
-#include "tensorflow/stream_executor/platform/port.h"
#include <vector>
+#include "tensorflow/core/platform/load_library.h"
#include "tensorflow/stream_executor/lib/error.h"
#include "tensorflow/stream_executor/lib/str_util.h"
#include "tensorflow/stream_executor/lib/strcat.h"
@@ -41,8 +44,8 @@ string GetCudaVersion() { return ""; }
string GetCudnnVersion() { return ""; }
/* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) {
- return GetDsoHandle(FindDsoPath("libcublas.so" + GetCudaVersion(),
- "third_party/gpus/cuda/lib64"),
+ return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cublas", GetCudaVersion()),
+ GetCudaLibraryDirPath()),
dso_handle);
}
@@ -51,33 +54,33 @@ string GetCudnnVersion() { return ""; }
// different version number than other CUDA libraries. See b/22397368 for
// some details about the complications surrounding this.
return GetDsoHandle(
- FindDsoPath("libcudnn.so" + GetCudnnVersion(),
- "third_party/gpus/cuda/lib64"),
- dso_handle);
+ FindDsoPath(tensorflow::internal::FormatLibraryFileName("cudnn", GetCudnnVersion()),
+ GetCudaLibraryDirPath()),
+ dso_handle);
}
/* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) {
- return GetDsoHandle(FindDsoPath("libcufft.so" + GetCudaVersion(),
- "third_party/gpus/cuda/lib64"),
+ return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cufft", GetCudaVersion()),
+ GetCudaLibraryDirPath()),
dso_handle);
}
/* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) {
- return GetDsoHandle(FindDsoPath("libcurand.so" + GetCudaVersion(),
- "third_party/gpus/cuda/lib64"),
+ return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("curand", GetCudaVersion()),
+ GetCudaLibraryDirPath()),
dso_handle);
}
/* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) {
- return GetDsoHandle(
- FindDsoPath("libcuda.so.1", "third_party/gpus/cuda/driver/lib64"),
- dso_handle);
+ return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cuda", ""),
+ GetCudaDriverLibraryPath()),
+ dso_handle);
}
/* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) {
return GetDsoHandle(
- FindDsoPath("libcupti.so" + GetCudaVersion(),
- "third_party/gpus/cuda/extras/CUPTI/lib64"),
+ FindDsoPath(tensorflow::internal::FormatLibraryFileName("cupti", GetCudaVersion()),
+ GetCudaCuptiLibraryPath()),
dso_handle);
}
@@ -109,7 +112,15 @@ string GetCudnnVersion() { return ""; }
/* static */ string DsoLoader::GetBinaryDirectory(bool strip_executable_name) {
char exe_path[PATH_MAX] = {0};
+#ifdef __APPLE__
+ uint32_t buffer_size(0U);
+ _NSGetExecutablePath(nullptr, &buffer_size);
+ char unresolved_path[buffer_size];
+ _NSGetExecutablePath(unresolved_path, &buffer_size);
+ CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1);
+#else
CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
+#endif
// Make sure it's null-terminated:
exe_path[sizeof(exe_path) - 1] = 0;
@@ -126,8 +137,11 @@ string GetCudnnVersion() { return ""; }
// Ownership is transferred to the caller.
static std::vector<string>* CreatePrimordialRpaths() {
auto rpaths = new std::vector<string>;
- rpaths->push_back(
- "driver/driver_sh.runfiles/third_party/gpus/cuda/lib64");
+#if defined(__APPLE__)
+ rpaths->push_back("driver/driver_sh.runfiles/third_party/gpus/cuda/lib");
+#else
+ rpaths->push_back("driver/driver_sh.runfiles/third_party/gpus/cuda/lib64");
+#endif
return rpaths;
}
@@ -175,6 +189,31 @@ static std::vector<string>* CreatePrimordialRpaths() {
return library_name.ToString();
}
+/* static */ string DsoLoader::GetCudaLibraryDirPath() {
+#if defined(__APPLE__)
+ return "third_party/gpus/cuda/lib";
+#else
+ return "third_party/gpus/cuda/lib64";
+#endif
+}
+
+/* static */ string DsoLoader::GetCudaDriverLibraryPath() {
+#if defined(__APPLE__)
+ return "third_party/gpus/cuda/driver/lib";
+#else
+ return "third_party/gpus/cuda/driver/lib64";
+#endif
+}
+
+/* static */ string DsoLoader::GetCudaCuptiLibraryPath() {
+#if defined(__APPLE__)
+ return "third_party/gpus/cuda/extras/CUPTI/lib";
+#else
+ return "third_party/gpus/cuda/extras/CUPTI/lib64";
+#endif
+}
+
+
// -- CachedDsoLoader
/* static */ port::StatusOr<void*> CachedDsoLoader::GetCublasDsoHandle() {
diff --git a/tensorflow/stream_executor/dso_loader.h b/tensorflow/stream_executor/dso_loader.h
index ba1690c320..2afbc294df 100644
--- a/tensorflow/stream_executor/dso_loader.h
+++ b/tensorflow/stream_executor/dso_loader.h
@@ -91,6 +91,11 @@ class DsoLoader {
static string FindDsoPath(port::StringPiece library_name,
port::StringPiece runfiles_relpath);
+ // Return platform dependent paths for DSOs
+ static string GetCudaLibraryDirPath();
+ static string GetCudaDriverLibraryPath();
+ static string GetCudaCuptiLibraryPath();
+
SE_DISALLOW_COPY_AND_ASSIGN(DsoLoader);
};
diff --git a/tensorflow/stream_executor/lib/static_threadlocal.h b/tensorflow/stream_executor/lib/static_threadlocal.h
index 7098da3453..25d97ae000 100644
--- a/tensorflow/stream_executor/lib/static_threadlocal.h
+++ b/tensorflow/stream_executor/lib/static_threadlocal.h
@@ -18,7 +18,7 @@ limitations under the License.
// For POD types in TLS mode, s_obj_VAR is the thread-local variable.
#define SE_STATIC_THREAD_LOCAL_POD(_Type_, _var_) \
- static thread_local _Type_ s_obj_##_var_; \
+ static __thread _Type_ s_obj_##_var_; \
namespace { \
class ThreadLocal_##_var_ { \
public: \
diff --git a/third_party/gpus/crosstool/BUILD b/third_party/gpus/crosstool/BUILD
index eac4dc7fad..7c9c8ab884 100644
--- a/third_party/gpus/crosstool/BUILD
+++ b/third_party/gpus/crosstool/BUILD
@@ -22,6 +22,20 @@ cc_toolchain(
supports_param_files = 0,
)
+cc_toolchain(
+ name = "cc-compiler-darwin",
+ all_files = ":empty",
+ compiler_files = ":empty",
+ cpu = "darwin",
+ dwp_files = ":empty",
+ dynamic_runtime_libs = [":empty"],
+ linker_files = ":empty",
+ objcopy_files = ":empty",
+ static_runtime_libs = [":empty"],
+ strip_files = ":empty",
+ supports_param_files = 0,
+)
+
filegroup(
name = "empty",
srcs = [],
diff --git a/third_party/gpus/crosstool/CROSSTOOL b/third_party/gpus/crosstool/CROSSTOOL
index a9f26f5710..8db81a9603 100644
--- a/third_party/gpus/crosstool/CROSSTOOL
+++ b/third_party/gpus/crosstool/CROSSTOOL
@@ -150,3 +150,95 @@ toolchain {
}
linking_mode_flags { mode: DYNAMIC }
}
+
+toolchain {
+ abi_version: "local"
+ abi_libc_version: "local"
+ builtin_sysroot: ""
+ compiler: "compiler"
+ host_system_name: "local"
+ needsPic: true
+ target_libc: "macosx"
+ target_cpu: "darwin"
+ target_system_name: "local"
+ toolchain_identifier: "local_darwin"
+
+ tool_path { name: "ar" path: "/usr/bin/libtool" }
+ tool_path { name: "compat-ld" path: "/usr/bin/ld" }
+ tool_path { name: "cpp" path: "/usr/bin/cpp" }
+ tool_path { name: "dwp" path: "/usr/bin/dwp" }
+ tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_is_not_gcc" }
+ cxx_flag: "-std=c++11"
+ ar_flag: "-static"
+ ar_flag: "-s"
+ ar_flag: "-o"
+ linker_flag: "-lc++"
+ linker_flag: "-undefined"
+ linker_flag: "dynamic_lookup"
+ # TODO(ulfjack): This is wrong on so many levels. Figure out a way to auto-detect the proper
+ # setting from the local compiler, and also how to make incremental builds correct.
+ cxx_builtin_include_directory: "/"
+ tool_path { name: "gcov" path: "/usr/bin/gcov" }
+ tool_path { name: "ld" path: "/usr/bin/ld" }
+ tool_path { name: "nm" path: "/usr/bin/nm" }
+ tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
+ objcopy_embed_flag: "-I"
+ objcopy_embed_flag: "binary"
+ tool_path { name: "objdump" path: "/usr/bin/objdump" }
+ tool_path { name: "strip" path: "/usr/bin/strip" }
+
+ # Anticipated future default.
+ unfiltered_cxx_flag: "-no-canonical-prefixes"
+ # Make C++ compilation deterministic. Use linkstamping instead of these
+ # compiler symbols.
+ unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
+ unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
+ unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
+ unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
+
+ # Security hardening on by default.
+ # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
+ compiler_flag: "-D_FORTIFY_SOURCE=1"
+ compiler_flag: "-fstack-protector"
+
+ # Enable coloring even if there's no attached terminal. Bazel removes the
+ # escape sequences if --nocolor is specified.
+ compiler_flag: "-fcolor-diagnostics"
+
+ # All warnings are enabled. Maybe enable -Werror as well?
+ compiler_flag: "-Wall"
+ # Enable a few more warnings that aren't part of -Wall.
+ compiler_flag: "-Wthread-safety"
+ compiler_flag: "-Wself-assign"
+
+ # Keep stack frames for debugging, even in opt mode.
+ compiler_flag: "-fno-omit-frame-pointer"
+
+ # Anticipated future default.
+ linker_flag: "-no-canonical-prefixes"
+
+ compilation_mode_flags {
+ mode: DBG
+ # Enable debug symbols.
+ compiler_flag: "-g"
+ }
+ compilation_mode_flags {
+ mode: OPT
+ # No debug symbols.
+ # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or even generally?
+ # However, that can't happen here, as it requires special handling in Bazel.
+ compiler_flag: "-g0"
+
+ # Conservative choice for -O
+ # -O3 can increase binary size and even slow down the resulting binaries.
+ # Profile first and / or use FDO if you need better performance than this.
+ compiler_flag: "-O2"
+
+ # Disable assertions
+ compiler_flag: "-DNDEBUG"
+
+ # Removal of unused code and data at link time (can this increase binary size in some cases?).
+ compiler_flag: "-ffunction-sections"
+ compiler_flag: "-fdata-sections"
+ }
+}
diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
index 04ab50ca86..5f175efcf3 100755
--- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python2.7
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/third_party/gpus/cuda/BUILD b/third_party/gpus/cuda/BUILD
index 28b49b8cc7..792dbb4268 100644
--- a/third_party/gpus/cuda/BUILD
+++ b/third_party/gpus/cuda/BUILD
@@ -1,10 +1,11 @@
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
load("//tensorflow:tensorflow.bzl", "if_cuda")
-load("//tensorflow/core:platform/default/build_config.bzl",
- "tf_get_cuda_version",
- "tf_get_cudnn_version",
- )
+load("platform", "cuda_library_path")
+load("platform", "cuda_static_library_path")
+load("platform", "cudnn_library_path")
+load("platform", "cupti_library_path")
+load("platform", "readlink_command")
package(default_visibility = ["//visibility:public"])
@@ -31,6 +32,12 @@ config_setting(
},
)
+config_setting(
+ name = "darwin",
+ values = {"cpu": "darwin"},
+ visibility = ["//visibility:public"],
+)
+
cc_library(
name = "cuda_headers",
hdrs = glob([
@@ -43,24 +50,26 @@ cc_library(
cc_library(
name = "cudart_static",
srcs = [
- "lib64/libcudart_static.a",
+ cuda_static_library_path("cudart"),
],
includes = ["include/"],
linkopts = [
"-ldl",
- "-lrt",
"-lpthread",
- ],
+ ] + select({
+ "//tensorflow:darwin": [],
+ "//conditions:default": ["-lrt"]
+ }),
visibility = ["//visibility:public"],
)
cc_library(
name = "cudart",
srcs = [
- "lib64/libcudart.so" + tf_get_cuda_version(),
+ cuda_library_path("cudart")
],
data = [
- "lib64/libcudart.so" + tf_get_cuda_version(),
+ cuda_library_path("cudart")
],
includes = ["include/"],
visibility = ["//visibility:public"],
@@ -70,10 +79,10 @@ cc_library(
cc_library(
name = "cublas",
srcs = [
- "lib64/libcublas.so" + tf_get_cuda_version(),
+ cuda_library_path("cublas")
],
data = [
- "lib64/libcublas.so" + tf_get_cuda_version(),
+ cuda_library_path("cublas")
],
includes = ["include/"],
visibility = ["//visibility:public"],
@@ -83,10 +92,10 @@ cc_library(
cc_library(
name = "cudnn",
srcs = [
- "lib64/libcudnn.so" + tf_get_cudnn_version(),
+ cudnn_library_path()
],
data = [
- "lib64/libcudnn.so" + tf_get_cudnn_version(),
+ cudnn_library_path()
],
includes = ["include/"],
visibility = ["//visibility:public"],
@@ -96,10 +105,10 @@ cc_library(
cc_library(
name = "cufft",
srcs = [
- "lib64/libcufft.so" + tf_get_cuda_version(),
+ cuda_library_path("cufft")
],
data = [
- "lib64/libcufft.so" + tf_get_cuda_version(),
+ cuda_library_path("cufft")
],
includes = ["include/"],
visibility = ["//visibility:public"],
@@ -130,7 +139,7 @@ cc_library(
cc_library(
name = "cupti_dsos",
data = [
- "extras/CUPTI/lib64/libcupti.so" + tf_get_cuda_version(),
+ cupti_library_path(),
],
visibility = ["//visibility:public"],
)
@@ -152,34 +161,34 @@ genrule(
"include/cublas.h",
"include/cudnn.h",
"extras/CUPTI/include/cupti.h",
- "lib64/libcudart_static.a",
- "lib64/libcublas.so" + tf_get_cuda_version(),
- "lib64/libcudnn.so" + tf_get_cudnn_version(),
- "lib64/libcudart.so" + tf_get_cuda_version(),
- "lib64/libcufft.so" + tf_get_cuda_version(),
- "extras/CUPTI/lib64/libcupti.so" + tf_get_cuda_version(),
+ cuda_static_library_path("cudart"),
+ cuda_library_path("cublas"),
+ cudnn_library_path(),
+ cuda_library_path("cudart"),
+ cuda_library_path("cufft"),
+ cupti_library_path(),
],
cmd = if_cuda(
# Under cuda config, create all the symbolic links to the actual cuda files
- "OUTPUTDIR=`readlink -f $(@D)/../../..`; cd `dirname $(location :cuda_config.sh)`; OUTPUTDIR=$$OUTPUTDIR ./cuda_config.sh --check;",
+ "OUTPUTDIR=`{} -f $(@D)/../../..`; cd third_party/gpus/cuda; OUTPUTDIR=$$OUTPUTDIR ./cuda_config.sh --check;".format(readlink_command()),
# Under non-cuda config, create all dummy files to make the build go through
";".join([
- "mkdir -p $(@D)/include",
- "mkdir -p $(@D)/lib64",
- "mkdir -p $(@D)/extras/CUPTI/include",
- "mkdir -p $(@D)/extras/CUPTI/lib64",
- "touch $(@D)/include/cuda.h",
- "touch $(@D)/include/cublas.h",
- "touch $(@D)/include/cudnn.h",
- "touch $(@D)/extras/CUPTI/include/cupti.h",
- "touch $(@D)/lib64/libcudart_static.a",
- "touch $(@D)/lib64/libcublas.so" + tf_get_cuda_version(),
- "touch $(@D)/lib64/libcudnn.so" + tf_get_cudnn_version(),
- "touch $(@D)/lib64/libcudart.so" + tf_get_cuda_version(),
- "touch $(@D)/lib64/libcufft.so" + tf_get_cuda_version(),
- "touch $(@D)/extras/CUPTI/lib64/libcupti.so" + tf_get_cuda_version(),
- ]),
+ "mkdir -p $(@D)/include",
+ "mkdir -p $(@D)/lib64",
+ "mkdir -p $(@D)/extras/CUPTI/include",
+ "mkdir -p $(@D)/extras/CUPTI/lib64",
+ "touch $(@D)/include/cuda.h",
+ "touch $(@D)/include/cublas.h",
+ "touch $(@D)/include/cudnn.h",
+ "touch $(@D)/extras/CUPTI/include/cupti.h",
+ "touch $(@D)/{}".format(cuda_static_library_path("cudart")),
+ "touch $(@D)/{}".format(cuda_library_path("cublas")),
+ "touch $(@D)/{}".format(cudnn_library_path()),
+ "touch $(@D)/{}".format(cuda_library_path("cudart")),
+ "touch $(@D)/{}".format(cuda_library_path("cufft")),
+ "touch $(@D)/{}".format(cupti_library_path()),
+ ]),
),
local = 1,
)
@@ -191,7 +200,7 @@ genrule(
],
cmd = if_cuda(
# Under cuda config, create the symbolic link to the actual cuda.config
- "configfile=$(location :cuda.config); ln -sf `readlink -f $${configfile#*/*/*/}` $(@D)/;",
+ "configfile=$(location :cuda.config); ln -sf `{} -f $${{configfile#*/*/*/}}` $(@D)/;".format(readlink_command()),
# Under non-cuda config, create the dummy file
";".join([
diff --git a/third_party/gpus/cuda/cuda_config.sh b/third_party/gpus/cuda/cuda_config.sh
index e93a7ed741..0e1106bb70 100755
--- a/third_party/gpus/cuda/cuda_config.sh
+++ b/third_party/gpus/cuda/cuda_config.sh
@@ -54,7 +54,19 @@ source cuda.config || exit -1
OUTPUTDIR=${OUTPUTDIR:-../../..}
CUDA_TOOLKIT_PATH=${CUDA_TOOLKIT_PATH:-/usr/local/cuda}
-CUDNN_INSTALL_PATH=${CUDNN_INSTALL_PATH:-/usr/local/cuda}
+CUDNN_INSTALL_BASEDIR=${CUDNN_INSTALL_PATH:-/usr/local/cuda}
+
+if [[ -z "$TF_CUDA_VERSION" ]]; then
+ TF_CUDA_EXT=""
+else
+ TF_CUDA_EXT=".$TF_CUDA_VERSION"
+fi
+
+if [[ -z "$TF_CUDNN_VERSION" ]]; then
+ TF_CUDNN_EXT=""
+else
+ TF_CUDNN_EXT=".$TF_CUDNN_VERSION"
+fi
# An error message when the Cuda toolkit is not found
function CudaError {
@@ -99,59 +111,84 @@ function CheckAndLinkToSrcTree {
# Link the output file to the source tree, avoiding self links if they are
# the same. This could happen if invoked from the source tree by accident.
- if [ ! $(readlink -f $PWD) == $(readlink -f $OUTPUTDIR/third_party/gpus/cuda) ]; then
+ if [ ! $($READLINK_CMD -f $PWD) == $($READLINK_CMD -f $OUTPUTDIR/third_party/gpus/cuda) ]; then
mkdir -p $(dirname $OUTPUTDIR/third_party/gpus/cuda/$FILE)
ln -sf $PWD/$FILE $OUTPUTDIR/third_party/gpus/cuda/$FILE
fi
}
+OSNAME=`uname -s`
+if [ "$OSNAME" == "Linux" ]; then
+ CUDA_LIB_PATH="lib64"
+ CUDA_CUPTI_LIB_DIR="extras/CUPTI/lib64"
+ CUDA_RT_LIB_PATH="lib64/libcudart.so${TF_CUDA_EXT}"
+ CUDA_RT_LIB_STATIC_PATH="lib64/libcudart_static.a"
+ CUDA_BLAS_LIB_PATH="lib64/libcublas.so${TF_CUDA_EXT}"
+ CUDA_DNN_LIB_PATH="lib64/libcudnn.so${TF_CUDNN_EXT}"
+ CUDA_DNN_LIB_ALT_PATH="libcudnn.so${TF_CUDNN_EXT}"
+ CUDA_FFT_LIB_PATH="lib64/libcufft.so${TF_CUDA_EXT}"
+ CUDA_CUPTI_LIB_PATH="extras/CUPTI/lib64/libcupti.so${TF_CUDA_EXT}"
+ READLINK_CMD="readlink"
+elif [ "$OSNAME" == "Darwin" ]; then
+ CUDA_LIB_PATH="lib"
+ CUDA_CUPTI_LIB_DIR="extras/CUPTI/lib"
+ CUDA_RT_LIB_PATH="lib/libcudart${TF_CUDA_EXT}.dylib"
+ CUDA_RT_LIB_STATIC_PATH="lib/libcudart_static.a"
+ CUDA_BLAS_LIB_PATH="lib/libcublas${TF_CUDA_EXT}.dylib"
+ CUDA_DNN_LIB_PATH="lib/libcudnn${TF_CUDNN_EXT}.dylib"
+ CUDA_DNN_LIB_ALT_PATH="libcudnn${TF_CUDNN_EXT}.dylib"
+ CUDA_FFT_LIB_PATH="lib/libcufft${TF_CUDA_EXT}.dylib"
+ CUDA_CUPTI_LIB_PATH="extras/CUPTI/lib/libcupti${TF_CUDA_EXT}.dylib"
+ READLINK_CMD="greadlink"
+fi
+
if [ "$CHECK_ONLY" == "1" ]; then
CheckAndLinkToSrcTree CudaError include/cuda.h
CheckAndLinkToSrcTree CudaError include/cublas.h
CheckAndLinkToSrcTree CudnnError include/cudnn.h
CheckAndLinkToSrcTree CudaError extras/CUPTI/include/cupti.h
- CheckAndLinkToSrcTree CudaError lib64/libcudart_static.a
- CheckAndLinkToSrcTree CudaError lib64/libcublas.so$TF_CUDA_VERSION
- CheckAndLinkToSrcTree CudnnError lib64/libcudnn.so$TF_CUDNN_VERSION
- CheckAndLinkToSrcTree CudaError lib64/libcudart.so$TF_CUDA_VERSION
- CheckAndLinkToSrcTree CudaError lib64/libcufft.so$TF_CUDA_VERSION
- CheckAndLinkToSrcTree CudaError extras/CUPTI/lib64/libcupti.so$TF_CUDA_VERSION
+ CheckAndLinkToSrcTree CudaError $CUDA_RT_LIB_STATIC_PATH
+ CheckAndLinkToSrcTree CudaError $CUDA_BLAS_LIB_PATH
+ CheckAndLinkToSrcTree CudnnError $CUDA_DNN_LIB_PATH
+ CheckAndLinkToSrcTree CudaError $CUDA_RT_LIB_PATH
+ CheckAndLinkToSrcTree CudaError $CUDA_FFT_LIB_PATH
+ CheckAndLinkToSrcTree CudaError $CUDA_CUPTI_LIB_PATH
exit 0
fi
# Actually configure the source tree for TensorFlow's canonical view of Cuda
# libraries.
-if test ! -e ${CUDA_TOOLKIT_PATH}/lib64/libcudart.so$TF_CUDA_VERSION; then
- CudaError "cannot find ${CUDA_TOOLKIT_PATH}/lib64/libcudart.so$TF_CUDA_VERSION"
+if test ! -e ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}; then
+ CudaError "cannot find ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}"
fi
-if test ! -e ${CUDA_TOOLKIT_PATH}/extras/CUPTI/lib64/libcupti.so$TF_CUDA_VERSION; then
- CudaError "cannot find ${CUDA_TOOLKIT_PATH}/extras/CUPTI/lib64/libcupti.so$TF_CUDA_VERSION"
+if test ! -e ${CUDA_TOOLKIT_PATH}/${CUDA_CUPTI_LIB_PATH}; then
+ CudaError "cannot find ${CUDA_TOOLKIT_PATH}/${CUDA_CUPTI_LIB_PATH}"
fi
-if test ! -d ${CUDNN_INSTALL_PATH}; then
- CudnnError "cannot find dir: ${CUDNN_INSTALL_PATH}"
+if test ! -d ${CUDNN_INSTALL_BASEDIR}; then
+ CudnnError "cannot find dir: ${CUDNN_INSTALL_BASEDIR}"
fi
# Locate cudnn.h
-if test -e ${CUDNN_INSTALL_PATH}/cudnn.h; then
- CUDNN_HEADER_PATH=${CUDNN_INSTALL_PATH}
-elif test -e ${CUDNN_INSTALL_PATH}/include/cudnn.h; then
- CUDNN_HEADER_PATH=${CUDNN_INSTALL_PATH}/include
+if test -e ${CUDNN_INSTALL_BASEDIR}/cudnn.h; then
+ CUDNN_HEADER_DIR=${CUDNN_INSTALL_BASEDIR}
+elif test -e ${CUDNN_INSTALL_BASEDIR}/include/cudnn.h; then
+ CUDNN_HEADER_DIR=${CUDNN_INSTALL_BASEDIR}/include
elif test -e /usr/include/cudnn.h; then
- CUDNN_HEADER_PATH=/usr/include
+ CUDNN_HEADER_DIR=/usr/include
else
- CudnnError "cannot find cudnn.h under: ${CUDNN_INSTALL_PATH} or /usr/include"
+ CudnnError "cannot find cudnn.h under: ${CUDNN_INSTALL_BASEDIR}"
fi
-# Locate libcudnn.so.${$TF_CUDNN_VERSION}
-if test -e ${CUDNN_INSTALL_PATH}/libcudnn.so$TF_CUDNN_VERSION; then
- CUDNN_LIB_PATH=${CUDNN_INSTALL_PATH}
-elif test -e ${CUDNN_INSTALL_PATH}/lib64/libcudnn.so$TF_CUDNN_VERSION; then
- CUDNN_LIB_PATH=${CUDNN_INSTALL_PATH}/lib64
+# Locate libcudnn
+if test -e ${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_PATH}; then
+ CUDNN_LIB_INSTALL_PATH=${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_PATH}
+elif test -e ${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_ALT_PATH}; then
+ CUDNN_LIB_INSTALL_PATH=${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_ALT_PATH}
else
- CudnnError "cannot find libcudnn.so.$TF_CUDNN_VERSION under: ${CUDNN_INSTALL_PATH}"
+ CudnnError "cannot find ${CUDA_DNN_LIB_PATH} or ${CUDA_DNN_LIB_ALT_PATH} under: ${CUDNN_INSTALL_BASEDIR}"
fi
# Helper function to build symbolic links for all files under a directory.
@@ -181,8 +218,8 @@ function LinkAllFiles {
mkdir -p $OUTPUTDIR/third_party/gpus/cuda
echo "Setting up Cuda include"
LinkAllFiles ${CUDA_TOOLKIT_PATH}/include $OUTPUTDIR/third_party/gpus/cuda/include || exit -1
-echo "Setting up Cuda lib64"
-LinkAllFiles ${CUDA_TOOLKIT_PATH}/lib64 $OUTPUTDIR/third_party/gpus/cuda/lib64 || exit -1
+echo "Setting up Cuda ${CUDA_LIB_PATH}"
+LinkAllFiles ${CUDA_TOOLKIT_PATH}/${CUDA_LIB_PATH} $OUTPUTDIR/third_party/gpus/cuda/${CUDA_LIB_PATH} || exit -1
echo "Setting up Cuda bin"
LinkAllFiles ${CUDA_TOOLKIT_PATH}/bin $OUTPUTDIR/third_party/gpus/cuda/bin || exit -1
echo "Setting up Cuda nvvm"
@@ -190,8 +227,8 @@ LinkAllFiles ${CUDA_TOOLKIT_PATH}/nvvm $OUTPUTDIR/third_party/gpus/cuda/nvvm ||
echo "Setting up CUPTI include"
LinkAllFiles ${CUDA_TOOLKIT_PATH}/extras/CUPTI/include $OUTPUTDIR/third_party/gpus/cuda/extras/CUPTI/include || exit -1
echo "Setting up CUPTI lib64"
-LinkAllFiles ${CUDA_TOOLKIT_PATH}/extras/CUPTI/lib64 $OUTPUTDIR/third_party/gpus/cuda/extras/CUPTI/lib64 || exit -1
+LinkAllFiles ${CUDA_TOOLKIT_PATH}/${CUDA_CUPTI_LIB_DIR} $OUTPUTDIR/third_party/gpus/cuda/${CUDA_CUPTI_LIB_DIR} || exit -1
# Set up symbolic link for cudnn
-ln -sf $CUDNN_HEADER_PATH/cudnn.h $OUTPUTDIR/third_party/gpus/cuda/include/cudnn.h || exit -1
-ln -sf $CUDNN_LIB_PATH/libcudnn.so$TF_CUDNN_VERSION $OUTPUTDIR/third_party/gpus/cuda/lib64/libcudnn.so$TF_CUDNN_VERSION || exit -1
+ln -sf $CUDNN_HEADER_DIR/cudnn.h $OUTPUTDIR/third_party/gpus/cuda/include/cudnn.h || exit -1
+ln -sf $CUDNN_LIB_INSTALL_PATH $OUTPUTDIR/third_party/gpus/cuda/$CUDA_DNN_LIB_PATH || exit -1
diff --git a/third_party/gpus/cuda/platform.bzl b/third_party/gpus/cuda/platform.bzl
new file mode 100644
index 0000000000..06f3d0cff4
--- /dev/null
+++ b/third_party/gpus/cuda/platform.bzl
@@ -0,0 +1,57 @@
+CUDA_VERSION = ""
+CUDNN_VERSION = ""
+PLATFORM = ""
+
+def cuda_sdk_version():
+ return CUDA_VERSION
+
+def cudnn_sdk_version():
+ return CUDNN_VERSION
+
+def cuda_library_path(name, version = cuda_sdk_version()):
+ if PLATFORM == "Darwin":
+ if not version:
+ return "lib/lib{}.dylib".format(name)
+ else:
+ return "lib/lib{}.{}.dylib".format(name, version)
+ else:
+ if not version:
+ return "lib64/lib{}.so".format(name)
+ else:
+ return "lib64/lib{}.so.{}".format(name, version)
+
+def cuda_static_library_path(name):
+ if PLATFORM == "Darwin":
+ return "lib/lib{}_static.a".format(name)
+ else:
+ return "lib64/lib{}_static.a".format(name)
+
+def cudnn_library_path(version = cudnn_sdk_version()):
+ if PLATFORM == "Darwin":
+ if not version:
+ return "lib/libcudnn.dylib"
+ else:
+ return "lib/libcudnn.{}.dylib".format(version)
+ else:
+ if not version:
+ return "lib64/libcudnn.so"
+ else:
+ return "lib64/libcudnn.so.{}".format(version)
+
+def cupti_library_path(version = cuda_sdk_version()):
+ if PLATFORM == "Darwin":
+ if not version:
+ return "extras/CUPTI/lib/libcupti.dylib"
+ else:
+ return "extras/CUPTI/lib/libcupti.{}.dylib".format(version)
+ else:
+ if not version:
+ return "extras/CUPTI/lib64/libcupti.so"
+ else:
+ return "extras/CUPTI/lib64/libcupti.so.{}".format(version)
+
+def readlink_command():
+ if PLATFORM == "Darwin":
+ return "greadlink"
+ else:
+ return "readlink"