1 files changed, 79 insertions, 29 deletions
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index a2b3e7d79e..bbe0442eaf 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -5,6 +5,9 @@
 
   * `TF_NEED_CUDA`: Whether to enable building with CUDA.
   * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
+  * `TF_CUDA_CLANG`: Wheter to use clang as a cuda compiler.
+  * `CLANG_CUDA_COMPILER_PATH`: The clang compiler path that will be used for
+    both host and device code compilation if TF_CUDA_CLANG is 1.
   * `CUDA_TOOLKIT_PATH`: The path to the CUDA toolkit. Default is
     `/usr/local/cuda`.
   * `TF_CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
@@ -17,6 +20,7 @@
 """
 
 _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
+_CLANG_CUDA_COMPILER_PATH = "CLANG_CUDA_COMPILER_PATH"
 _CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
 _TF_CUDA_VERSION = "TF_CUDA_VERSION"
 _TF_CUDNN_VERSION = "TF_CUDNN_VERSION"
@@ -35,19 +39,25 @@ _DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"]
 # BEGIN cc_configure common functions.
 def find_cc(repository_ctx):
   """Find the C++ compiler."""
-  cc_name = "gcc"
-  if _GCC_HOST_COMPILER_PATH in repository_ctx.os.environ:
-    cc_name = repository_ctx.os.environ[_GCC_HOST_COMPILER_PATH].strip()
-    if not cc_name:
-      cc_name = "gcc"
+  if _use_cuda_clang(repository_ctx):
+    target_cc_name = "clang"
+    cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
+  else:
+    target_cc_name = "gcc"
+    cc_path_envvar = _GCC_HOST_COMPILER_PATH
+  cc_name = target_cc_name
+
+  if cc_path_envvar in repository_ctx.os.environ:
+    cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
+    if cc_name_from_env:
+      cc_name = cc_name_from_env
   if cc_name.startswith("/"):
     # Absolute path, maybe we should make this suported by our which function.
     return cc_name
   cc = repository_ctx.which(cc_name)
   if cc == None:
-    fail(
-        "Cannot find gcc, either correct your path or set the CC" +
-        " environment variable")
+    fail(("Cannot find {}, either correct your path or set the {}" +
+          " environment variable").format(target_cc_name, cc_path_envvar))
   return cc
 
 
@@ -64,10 +74,17 @@ def _cxx_inc_convert(path):
     path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
   return path
 
-
-def get_cxx_inc_directories(repository_ctx, cc):
-  """Compute the list of default C++ include directories."""
-  result = repository_ctx.execute([cc, "-E", "-xc++", "-", "-v"])
+def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
+  """Compute the list of default C or C++ include directories."""
+  if lang_is_cpp:
+    lang = "c++"
+  else:
+    lang = "c"
+  # TODO: We pass -no-canonical-prefixes here to match the compiler flags,
+  #       but in cuda_clang CROSSTOOL file that is a `feature` and we should
+  #       handle the case when it's disabled and no flag is passed
+  result = repository_ctx.execute([cc, "-no-canonical-prefixes",
+                                   "-E", "-x" + lang, "-", "-v"])
   index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
   if index1 == -1:
     return []
@@ -86,6 +103,19 @@ def get_cxx_inc_directories(repository_ctx, cc):
   return [repository_ctx.path(_cxx_inc_convert(p))
           for p in inc_dirs.split("\n")]
 
+def get_cxx_inc_directories(repository_ctx, cc):
+  """Compute the list of default C and C++ include directories."""
+  # For some reason `clang -xc` sometimes returns include paths that are
+  # different from the ones from `clang -xc++`. (Symlink and a dir)
+  # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
+  includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
+  includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
+
+  includes_cpp_set = set(includes_cpp)
+  return includes_cpp + [inc for inc in includes_c
+                         if inc not in includes_cpp_set]
+
+
 def auto_configure_fail(msg):
   """Output failure message when auto configuration fails."""
   red = "\033[0;31m"
@@ -94,7 +124,7 @@ def auto_configure_fail(msg):
 # END cc_configure common functions (see TODO above).
 
 
-def _gcc_host_compiler_includes(repository_ctx, cc):
+def _host_compiler_includes(repository_ctx, cc):
   """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
 
   Args:
@@ -645,7 +675,8 @@ def _create_dummy_repository(repository_ctx):
   # Set up BUILD file for cuda/.
   _tpl(repository_ctx, "cuda:build_defs.bzl",
        {
-           "%{cuda_is_configured}": "False"
+           "%{cuda_is_configured}": "False",
+           "%{cuda_extra_copts}": "[]"
        })
   _tpl(repository_ctx, "cuda:BUILD",
        {
@@ -730,6 +761,19 @@ def _symlink_dir(repository_ctx, src_dir, dest_dir):
   for src_file in files:
     repository_ctx.symlink(src_file, dest_dir + "/" + src_file.basename)
 
+def _use_cuda_clang(repository_ctx):
+  if "TF_CUDA_CLANG" in repository_ctx.os.environ:
+    enable_cuda = repository_ctx.os.environ["TF_CUDA_CLANG"].strip()
+    return enable_cuda == "1"
+  return False
+
+def _compute_cuda_extra_copts(repository_ctx, cuda_config):
+  if _use_cuda_clang(repository_ctx):
+    capability_flags = ["--cuda-gpu-arch=sm_" + cap.replace(".", "") for cap in cuda_config.compute_capabilities]
+  else:
+    # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc
+    capability_flags = []
+  return str(capability_flags)
 
 def _create_cuda_repository(repository_ctx):
   """Creates the repository containing files set up to build with CUDA."""
@@ -761,7 +805,9 @@ def _create_cuda_repository(repository_ctx):
   # Set up BUILD file for cuda/
   _tpl(repository_ctx, "cuda:build_defs.bzl",
        {
-           "%{cuda_is_configured}": "True"
+           "%{cuda_is_configured}": "True",
+           "%{cuda_extra_copts}": _compute_cuda_extra_copts(repository_ctx, cuda_config),
+
        })
   _tpl(repository_ctx, "cuda:BUILD",
        {
@@ -787,21 +833,25 @@ def _create_cuda_repository(repository_ctx):
   # Set up crosstool/
   _file(repository_ctx, "crosstool:BUILD")
   cc = find_cc(repository_ctx)
-  gcc_host_compiler_includes = _gcc_host_compiler_includes(repository_ctx, cc)
-  _tpl(repository_ctx, "crosstool:CROSSTOOL",
-       {
+  host_compiler_includes = _host_compiler_includes(repository_ctx, cc)
+  cuda_defines = {
            "%{cuda_include_path}": cuda_config.cuda_toolkit_path + '/include',
-           "%{gcc_host_compiler_includes}": gcc_host_compiler_includes,
-       })
-  _tpl(repository_ctx,
-       "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
-       {
-           "%{cpu_compiler}": str(cc),
-           "%{cuda_version}": cuda_config.cuda_version,
-           "%{gcc_host_compiler_path}": str(cc),
-           "%{cuda_compute_capabilities}": ", ".join(
-               ["\"%s\"" % c for c in cuda_config.compute_capabilities]),
-       })
+           "%{host_compiler_includes}": host_compiler_includes,
+       }
+  if _use_cuda_clang(repository_ctx):
+    cuda_defines["%{clang_path}"] = cc
+    _tpl(repository_ctx, "crosstool:CROSSTOOL_clang", cuda_defines, out="crosstool/CROSSTOOL")
+  else:
+    _tpl(repository_ctx, "crosstool:CROSSTOOL_nvcc", cuda_defines, out="crosstool/CROSSTOOL")
+    _tpl(repository_ctx,
+         "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
+         {
+             "%{cpu_compiler}": str(cc),
+             "%{cuda_version}": cuda_config.cuda_version,
+             "%{gcc_host_compiler_path}": str(cc),
+             "%{cuda_compute_capabilities}": ", ".join(
+                 ["\"%s\"" % c for c in cuda_config.compute_capabilities]),
+         })
 
   # Set up cuda_config.h, which is used by
   # tensorflow/stream_executor/dso_loader.cc.