aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/gpus/cuda_configure.bzl
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/gpus/cuda_configure.bzl')
-rw-r--r--third_party/gpus/cuda_configure.bzl108
1 files changed, 79 insertions, 29 deletions
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index a2b3e7d79e..bbe0442eaf 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -5,6 +5,9 @@
* `TF_NEED_CUDA`: Whether to enable building with CUDA.
* `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
+ * `TF_CUDA_CLANG`: Wheter to use clang as a cuda compiler.
+ * `CLANG_CUDA_COMPILER_PATH`: The clang compiler path that will be used for
+ both host and device code compilation if TF_CUDA_CLANG is 1.
* `CUDA_TOOLKIT_PATH`: The path to the CUDA toolkit. Default is
`/usr/local/cuda`.
* `TF_CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
@@ -17,6 +20,7 @@
"""
_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
+_CLANG_CUDA_COMPILER_PATH = "CLANG_CUDA_COMPILER_PATH"
_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
_TF_CUDA_VERSION = "TF_CUDA_VERSION"
_TF_CUDNN_VERSION = "TF_CUDNN_VERSION"
@@ -35,19 +39,25 @@ _DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"]
# BEGIN cc_configure common functions.
def find_cc(repository_ctx):
"""Find the C++ compiler."""
- cc_name = "gcc"
- if _GCC_HOST_COMPILER_PATH in repository_ctx.os.environ:
- cc_name = repository_ctx.os.environ[_GCC_HOST_COMPILER_PATH].strip()
- if not cc_name:
- cc_name = "gcc"
+ if _use_cuda_clang(repository_ctx):
+ target_cc_name = "clang"
+ cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
+ else:
+ target_cc_name = "gcc"
+ cc_path_envvar = _GCC_HOST_COMPILER_PATH
+ cc_name = target_cc_name
+
+ if cc_path_envvar in repository_ctx.os.environ:
+ cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
+ if cc_name_from_env:
+ cc_name = cc_name_from_env
if cc_name.startswith("/"):
# Absolute path, maybe we should make this suported by our which function.
return cc_name
cc = repository_ctx.which(cc_name)
if cc == None:
- fail(
- "Cannot find gcc, either correct your path or set the CC" +
- " environment variable")
+ fail(("Cannot find {}, either correct your path or set the {}" +
+ " environment variable").format(target_cc_name, cc_path_envvar))
return cc
@@ -64,10 +74,17 @@ def _cxx_inc_convert(path):
path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
return path
-
-def get_cxx_inc_directories(repository_ctx, cc):
- """Compute the list of default C++ include directories."""
- result = repository_ctx.execute([cc, "-E", "-xc++", "-", "-v"])
+def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
+ """Compute the list of default C or C++ include directories."""
+ if lang_is_cpp:
+ lang = "c++"
+ else:
+ lang = "c"
+ # TODO: We pass -no-canonical-prefixes here to match the compiler flags,
+ # but in cuda_clang CROSSTOOL file that is a `feature` and we should
+ # handle the case when it's disabled and no flag is passed
+ result = repository_ctx.execute([cc, "-no-canonical-prefixes",
+ "-E", "-x" + lang, "-", "-v"])
index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
if index1 == -1:
return []
@@ -86,6 +103,19 @@ def get_cxx_inc_directories(repository_ctx, cc):
return [repository_ctx.path(_cxx_inc_convert(p))
for p in inc_dirs.split("\n")]
+def get_cxx_inc_directories(repository_ctx, cc):
+ """Compute the list of default C and C++ include directories."""
+ # For some reason `clang -xc` sometimes returns include paths that are
+ # different from the ones from `clang -xc++`. (Symlink and a dir)
+ # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
+ includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
+ includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
+
+ includes_cpp_set = set(includes_cpp)
+ return includes_cpp + [inc for inc in includes_c
+ if inc not in includes_cpp_set]
+
+
def auto_configure_fail(msg):
"""Output failure message when auto configuration fails."""
red = "\033[0;31m"
@@ -94,7 +124,7 @@ def auto_configure_fail(msg):
# END cc_configure common functions (see TODO above).
-def _gcc_host_compiler_includes(repository_ctx, cc):
+def _host_compiler_includes(repository_ctx, cc):
"""Generates the cxx_builtin_include_directory entries for gcc inc dirs.
Args:
@@ -645,7 +675,8 @@ def _create_dummy_repository(repository_ctx):
# Set up BUILD file for cuda/.
_tpl(repository_ctx, "cuda:build_defs.bzl",
{
- "%{cuda_is_configured}": "False"
+ "%{cuda_is_configured}": "False",
+ "%{cuda_extra_copts}": "[]"
})
_tpl(repository_ctx, "cuda:BUILD",
{
@@ -730,6 +761,19 @@ def _symlink_dir(repository_ctx, src_dir, dest_dir):
for src_file in files:
repository_ctx.symlink(src_file, dest_dir + "/" + src_file.basename)
+def _use_cuda_clang(repository_ctx):
+ if "TF_CUDA_CLANG" in repository_ctx.os.environ:
+ enable_cuda = repository_ctx.os.environ["TF_CUDA_CLANG"].strip()
+ return enable_cuda == "1"
+ return False
+
+def _compute_cuda_extra_copts(repository_ctx, cuda_config):
+ if _use_cuda_clang(repository_ctx):
+ capability_flags = ["--cuda-gpu-arch=sm_" + cap.replace(".", "") for cap in cuda_config.compute_capabilities]
+ else:
+ # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc
+ capability_flags = []
+ return str(capability_flags)
def _create_cuda_repository(repository_ctx):
"""Creates the repository containing files set up to build with CUDA."""
@@ -761,7 +805,9 @@ def _create_cuda_repository(repository_ctx):
# Set up BUILD file for cuda/
_tpl(repository_ctx, "cuda:build_defs.bzl",
{
- "%{cuda_is_configured}": "True"
+ "%{cuda_is_configured}": "True",
+ "%{cuda_extra_copts}": _compute_cuda_extra_copts(repository_ctx, cuda_config),
+
})
_tpl(repository_ctx, "cuda:BUILD",
{
@@ -787,21 +833,25 @@ def _create_cuda_repository(repository_ctx):
# Set up crosstool/
_file(repository_ctx, "crosstool:BUILD")
cc = find_cc(repository_ctx)
- gcc_host_compiler_includes = _gcc_host_compiler_includes(repository_ctx, cc)
- _tpl(repository_ctx, "crosstool:CROSSTOOL",
- {
+ host_compiler_includes = _host_compiler_includes(repository_ctx, cc)
+ cuda_defines = {
"%{cuda_include_path}": cuda_config.cuda_toolkit_path + '/include',
- "%{gcc_host_compiler_includes}": gcc_host_compiler_includes,
- })
- _tpl(repository_ctx,
- "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
- {
- "%{cpu_compiler}": str(cc),
- "%{cuda_version}": cuda_config.cuda_version,
- "%{gcc_host_compiler_path}": str(cc),
- "%{cuda_compute_capabilities}": ", ".join(
- ["\"%s\"" % c for c in cuda_config.compute_capabilities]),
- })
+ "%{host_compiler_includes}": host_compiler_includes,
+ }
+ if _use_cuda_clang(repository_ctx):
+ cuda_defines["%{clang_path}"] = cc
+ _tpl(repository_ctx, "crosstool:CROSSTOOL_clang", cuda_defines, out="crosstool/CROSSTOOL")
+ else:
+ _tpl(repository_ctx, "crosstool:CROSSTOOL_nvcc", cuda_defines, out="crosstool/CROSSTOOL")
+ _tpl(repository_ctx,
+ "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
+ {
+ "%{cpu_compiler}": str(cc),
+ "%{cuda_version}": cuda_config.cuda_version,
+ "%{gcc_host_compiler_path}": str(cc),
+ "%{cuda_compute_capabilities}": ", ".join(
+ ["\"%s\"" % c for c in cuda_config.compute_capabilities]),
+ })
# Set up cuda_config.h, which is used by
# tensorflow/stream_executor/dso_loader.cc.