56 files changed, 3510 insertions, 1788 deletions
diff --git a/third_party/cub.BUILD b/third_party/cub.BUILD
index 29159c9dad..a04347b21e 100644
--- a/third_party/cub.BUILD
+++ b/third_party/cub.BUILD
@@ -20,6 +20,7 @@ filegroup(
 cc_library(
     name = "cub",
     hdrs = if_cuda([":cub_header_files"]),
+    include_prefix = "third_party",
     deps = [
         "@local_config_cuda//cuda:cuda_headers",
     ],
diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD
index 203991b50f..f072f2545a 100644
--- a/third_party/eigen3/BUILD
+++ b/third_party/eigen3/BUILD
@@ -66,19 +66,13 @@ genrule(
     outs = ["include"],
     cmd = """
     mkdir $@
-    for f in $(locations @eigen_archive//:eigen_header_files) ; do
+    for f in $(SRCS); do
       d="$${f%/*}"
       d="$${d#*external/eigen_archive/}"
 
       mkdir -p "$@/$${d}"
       cp "$${f}" "$@/$${d}/"
     done
-
-    for f in $(locations :eigen_third_party_header_files) ; do
-      d="$${f%/*}"
-
-      mkdir -p "$@/$${d}"
-      cp "$${f}" "$@/$${d}/"
-    done
     """,
+    tags = ["manual"],
 )
diff --git a/third_party/flatbuffers/BUILD.bazel b/third_party/flatbuffers/BUILD.bazel
index 934c0d9650..d0be482fda 100644
--- a/third_party/flatbuffers/BUILD.bazel
+++ b/third_party/flatbuffers/BUILD.bazel
@@ -108,11 +108,14 @@ cc_binary(
         "grpc/src/compiler/schema_interface.h",
         "src/flatc_main.cpp",
         "src/idl_gen_cpp.cpp",
+        "src/idl_gen_dart.cpp",
         "src/idl_gen_general.cpp",
         "src/idl_gen_go.cpp",
         "src/idl_gen_grpc.cpp",
         "src/idl_gen_js.cpp",
         "src/idl_gen_json_schema.cpp",
+        "src/idl_gen_lobster.cpp",
+        "src/idl_gen_lua.cpp",
         "src/idl_gen_php.cpp",
         "src/idl_gen_python.cpp",
         "src/idl_gen_text.cpp",
diff --git a/third_party/flatbuffers/workspace.bzl b/third_party/flatbuffers/workspace.bzl
index 3aeef96a72..7613767fc4 100644
--- a/third_party/flatbuffers/workspace.bzl
+++ b/third_party/flatbuffers/workspace.bzl
@@ -5,11 +5,11 @@ load("//third_party:repo.bzl", "third_party_http_archive")
 def repo():
     third_party_http_archive(
         name = "flatbuffers",
-        strip_prefix = "flatbuffers-1.9.0",
-        sha256 = "5ca5491e4260cacae30f1a5786d109230db3f3a6e5a0eb45d0d0608293d247e3",
+        strip_prefix = "flatbuffers-1f5eae5d6a135ff6811724f6c57f911d1f46bb15",
+        sha256 = "b2bb0311ca40b12ebe36671bdda350b10c7728caf0cfe2d432ea3b6e409016f3",
         urls = [
-            "https://mirror.bazel.build/github.com/google/flatbuffers/archive/v1.9.0.tar.gz",
-            "https://github.com/google/flatbuffers/archive/v1.9.0.tar.gz",
+            "https://mirror.bazel.build/github.com/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f911d1f46bb15.tar.gz",
+            "https://github.com/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f911d1f46bb15.tar.gz",
         ],
         build_file = "//third_party/flatbuffers:BUILD.bazel",
         system_build_file = "//third_party/flatbuffers:BUILD.system",
diff --git a/third_party/gpus/crosstool/BUILD.tpl b/third_party/gpus/crosstool/BUILD.tpl
index f638756d23..c8812fab33 100644
--- a/third_party/gpus/crosstool/BUILD.tpl
+++ b/third_party/gpus/crosstool/BUILD.tpl
@@ -2,6 +2,20 @@ licenses(["restricted"])
 
 package(default_visibility = ["//visibility:public"])
 
+toolchain(
+    name = "toolchain-linux-x86_64",
+    exec_compatible_with = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    target_compatible_with = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    toolchain = ":cc-compiler-local",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
 cc_toolchain_suite(
     name = "toolchain",
     toolchains = {
diff --git a/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
new file mode 100644
index 0000000000..0e175b3ef6
--- /dev/null
+++ b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
@@ -0,0 +1,158 @@
+major_version: "local"
+minor_version: ""
+default_target_cpu: "same_as_host"
+
+default_toolchain {
+  cpu: "k8"
+  toolchain_identifier: "local_linux"
+}
+default_toolchain {
+  cpu: "piii"
+  toolchain_identifier: "local_linux"
+}
+default_toolchain {
+  cpu: "arm"
+  toolchain_identifier: "local_linux"
+}
+default_toolchain {
+  cpu: "ppc"
+  toolchain_identifier: "local_linux"
+}
+
+toolchain {
+  abi_version: "local"
+  abi_libc_version: "local"
+  builtin_sysroot: ""
+  compiler: "compiler"
+  host_system_name: "local"
+  needsPic: true
+  supports_gold_linker: false
+  supports_incremental_linker: false
+  supports_fission: false
+  supports_interface_shared_objects: false
+  supports_normalizing_ar: false
+  supports_start_end_lib: false
+  supports_thin_archives: false
+  target_libc: "local"
+  target_cpu: "local"
+  target_system_name: "local"
+  toolchain_identifier: "local_linux"
+
+  tool_path { name: "ar" path: "/usr/bin/ar" }
+  tool_path { name: "compat-ld" path: "/usr/bin/ld" }
+  tool_path { name: "cpp" path: "/usr/bin/cpp" }
+  tool_path { name: "dwp" path: "/usr/bin/dwp" }
+  # As part of the TensorFlow release, we place some ROCm-related compilation
+  # files in @local_config_rocm//crosstool/clang/bin, and this relative
+  # path, combined with the rest of our Bazel configuration causes our
+  # compilation to use those files.
+  tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_rocm" }
+  # Use "-std=c++11" for hipcc. For consistency, force both the host compiler
+  # and the device compiler to use "-std=c++11".
+  cxx_flag: "-std=c++11"
+  linker_flag: "-Wl,-no-as-needed"
+  linker_flag: "-lstdc++"
+  #linker_flag: "-B/usr/bin/"
+  linker_flag: "-B/opt/rocm/hcc/compiler/bin"
+
+%{host_compiler_includes}
+  tool_path { name: "gcov" path: "/usr/bin/gcov" }
+
+  # C(++) compiles invoke the compiler (as that is the one knowing where
+  # to find libraries), but we provide LD so other rules can invoke the linker.
+  tool_path { name: "ld" path: "/usr/bin/ld" }
+
+  tool_path { name: "nm" path: "/usr/bin/nm" }
+  tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
+  objcopy_embed_flag: "-I"
+  objcopy_embed_flag: "binary"
+  tool_path { name: "objdump" path: "/usr/bin/objdump" }
+  tool_path { name: "strip" path: "/usr/bin/strip" }
+
+  # Anticipated future default.
+  unfiltered_cxx_flag: "-no-canonical-prefixes"
+
+  # Make C++ compilation deterministic. Use linkstamping instead of these
+  # compiler symbols.
+  unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
+  unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
+  unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
+  unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
+  unfiltered_cxx_flag: "-D__HIP_PLATFORM_HCC__"
+  # The macro EIGEN_USE_HIP is used to tell Eigen to use the HIP platform headers
+  # It needs to be always set when compiling Eigen headers
+  # (irrespective of whether the source file is being compiled via HIPCC)
+  # so adding -DEIGEN_USE_HIP as a default CXX flag here
+  unfiltered_cxx_flag: "-DEIGEN_USE_HIP"
+
+    
+  # Security hardening on by default.
+  # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
+  # We need to undef it before redefining it as some distributions now have
+  # it enabled by default.
+  #compiler_flag: "-U_FORTIFY_SOURCE"
+  #compiler_flag: "-D_FORTIFY_SOURCE=1"
+  #compiler_flag: "-fstack-protector"
+  #compiler_flag: "-fPIE"
+  #linker_flag: "-pie"
+  #linker_flag: "-Wl,-z,relro,-z,now"
+
+  # Enable coloring even if there's no attached terminal. Bazel removes the
+  # escape sequences if --nocolor is specified. This isn't supported by gcc
+  # on Ubuntu 14.04.
+  # compiler_flag: "-fcolor-diagnostics"
+
+  # All warnings are enabled. Maybe enable -Werror as well?
+  compiler_flag: "-Wall"
+  # Enable a few more warnings that aren't part of -Wall.
+  compiler_flag: "-Wunused-but-set-parameter"
+  # But disable some that are problematic.
+  compiler_flag: "-Wno-free-nonheap-object" # has false positives
+
+  # Keep stack frames for debugging, even in opt mode.
+  compiler_flag: "-fno-omit-frame-pointer"
+
+  # Anticipated future default.
+  linker_flag: "-no-canonical-prefixes"
+  unfiltered_cxx_flag: "-fno-canonical-system-headers"
+  # Have gcc return the exit code from ld.
+  linker_flag: "-pass-exit-codes"
+  # Stamp the binary with a unique identifier.
+  linker_flag: "-Wl,--build-id=md5"
+  linker_flag: "-Wl,--hash-style=gnu"
+  # Gold linker only? Can we enable this by default?
+  # linker_flag: "-Wl,--warn-execstack"
+  # linker_flag: "-Wl,--detect-odr-violations"
+
+  # Include directory for ROCm headers.
+%{rocm_include_path}
+
+  compilation_mode_flags {
+    mode: DBG
+    # Enable debug symbols.
+    compiler_flag: "-g"
+  }
+  compilation_mode_flags {
+    mode: OPT
+
+    # No debug symbols.
+    # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or
+    # even generally? However, that can't happen here, as it requires special
+    # handling in Bazel.
+    compiler_flag: "-g0"
+
+    # Conservative choice for -O
+    # -O3 can increase binary size and even slow down the resulting binaries.
+    # Profile first and / or use FDO if you need better performance than this.
+    compiler_flag: "-O2"
+
+    # Disable assertions
+    compiler_flag: "-DNDEBUG"
+
+    # Removal of unused code and data at link time (can this increase binary size in some cases?).
+    compiler_flag: "-ffunction-sections"
+    compiler_flag: "-fdata-sections"
+    linker_flag: "-Wl,--gc-sections"
+  }
+  linking_mode_flags { mode: DYNAMIC }
+}
diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
new file mode 100755
index 0000000000..824238022b
--- /dev/null
+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
@@ -0,0 +1,241 @@
+#!/usr/bin/env python
+"""Crosstool wrapper for compiling ROCm programs.
+
+SYNOPSIS:
+  crosstool_wrapper_driver_rocm [options passed in by cc_library()
+                                or cc_binary() rule]
+
+DESCRIPTION:
+  This script is expected to be called by the cc_library() or cc_binary() bazel
+  rules. When the option "-x rocm" is present in the list of arguments passed
+  to this script, it invokes the hipcc compiler. Most arguments are passed
+  as is as a string to --compiler-options of hipcc. When "-x rocm" is not
+  present, this wrapper invokes gcc with the input arguments as is.
+"""
+
+from __future__ import print_function
+
+__author__ = 'whchung@gmail.com (Wen-Heng (Jack) Chung)'
+
+from argparse import ArgumentParser
+import os
+import subprocess
+import re
+import sys
+import pipes
+
+# Template values set by rocm_configure.bzl.
+CPU_COMPILER = ('%{cpu_compiler}')
+GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}')
+
+HIPCC_PATH = '%{hipcc_path}'
+PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
+
+def Log(s):
+  print('gpus/crosstool: {0}'.format(s))
+
+
+def GetOptionValue(argv, option):
+  """Extract the list of values for option from the argv list.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+    option: The option whose value to extract, without the leading '-'.
+
+  Returns:
+    A list of values, either directly following the option,
+    (eg., -opt val1 val2) or values collected from multiple occurrences of
+    the option (eg., -opt val1 -opt val2).
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-' + option, nargs='*', action='append')
+  args, _ = parser.parse_known_args(argv)
+  if not args or not vars(args)[option]:
+    return []
+  else:
+    return sum(vars(args)[option], [])
+
+
+def GetHostCompilerOptions(argv):
+  """Collect the -isystem, -iquote, and --sysroot option values from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+
+  Returns:
+    The string that can be used as the --compiler-options to hipcc.
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-isystem', nargs='*', action='append')
+  parser.add_argument('-iquote', nargs='*', action='append')
+  parser.add_argument('--sysroot', nargs=1)
+  parser.add_argument('-g', nargs='*', action='append')
+  parser.add_argument('-fno-canonical-system-headers', action='store_true')
+
+  args, _ = parser.parse_known_args(argv)
+
+  opts = ''
+
+  if args.isystem:
+    opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, []))
+  if args.iquote:
+    opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, []))
+  if args.g:
+    opts += ' -g' + ' -g'.join(sum(args.g, []))
+  #if args.fno_canonical_system_headers:
+  #  opts += ' -fno-canonical-system-headers'
+  if args.sysroot:
+    opts += ' --sysroot ' + args.sysroot[0]
+
+  return opts
+
+def GetHipccOptions(argv):
+  """Collect the -hipcc_options values from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+
+  Returns:
+    The string that can be passed directly to hipcc.
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-hipcc_options', nargs='*', action='append')
+
+  args, _ = parser.parse_known_args(argv)
+
+  if args.hipcc_options:
+    options = _update_options(sum(args.hipcc_options, []))
+    return ' '.join(['--'+a for a in options])
+  return ''
+
+
+def InvokeHipcc(argv, log=False):
+  """Call hipcc with arguments assembled from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+    log: True if logging is requested.
+
+  Returns:
+    The return value of calling os.system('hipcc ' + args)
+  """
+
+  host_compiler_options = GetHostCompilerOptions(argv)
+  hipcc_compiler_options = GetHipccOptions(argv)
+  opt_option = GetOptionValue(argv, 'O')
+  m_options = GetOptionValue(argv, 'm')
+  m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']])
+  include_options = GetOptionValue(argv, 'I')
+  out_file = GetOptionValue(argv, 'o')
+  depfiles = GetOptionValue(argv, 'MF')
+  defines = GetOptionValue(argv, 'D')
+  defines = ''.join([' -D' + define for define in defines])
+  undefines = GetOptionValue(argv, 'U')
+  undefines = ''.join([' -U' + define for define in undefines])
+  std_options = GetOptionValue(argv, 'std')
+  hipcc_allowed_std_options = ["c++11"]
+  std_options = ''.join([' -std=' + define
+      for define in std_options if define in hipcc_allowed_std_options])
+
+  # The list of source files get passed after the -c option. I don't know of
+  # any other reliable way to just get the list of source files to be compiled.
+  src_files = GetOptionValue(argv, 'c')
+
+  if len(src_files) == 0:
+    return 1
+  if len(out_file) != 1:
+    return 1
+
+  opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0)
+         else ' -g')
+
+  includes = (' -I ' + ' -I '.join(include_options)
+              if len(include_options) > 0
+              else '')
+
+  # Unfortunately, there are other options that have -c prefix too.
+  # So allowing only those look like C/C++ files.
+  src_files = [f for f in src_files if
+               re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)]
+  srcs = ' '.join(src_files)
+  out = ' -o ' + out_file[0]
+
+  hipccopts = ' '
+  hipccopts += ' ' + hipcc_compiler_options
+  hipccopts += undefines
+  hipccopts += defines
+  hipccopts += std_options
+  hipccopts += m_options
+
+  if depfiles:
+    # Generate the dependency file
+    depfile = depfiles[0]
+    cmd = (HIPCC_PATH + ' ' + hipccopts +
+           host_compiler_options +
+           ' ' + GCC_HOST_COMPILER_PATH +
+           ' -I .' + includes + ' ' + srcs + ' -M -o ' + depfile)
+    if log: Log(cmd)
+    exit_status = os.system(cmd)
+    if exit_status != 0:
+      return exit_status
+
+  cmd = (HIPCC_PATH + ' ' + hipccopts +
+         host_compiler_options + ' -fPIC' +
+         ' ' + GCC_HOST_COMPILER_PATH +
+         ' -I .' + opt + includes + ' -c ' + srcs + out)
+
+  # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'.
+  # Need to investigate and fix.
+  cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd
+  if log: Log(cmd)
+  return os.system(cmd)
+
+
+def main():
+  # ignore PWD env var
+  os.environ['PWD']=''
+
+  parser = ArgumentParser()
+  parser.add_argument('-x', nargs=1)
+  parser.add_argument('--rocm_log', action='store_true')
+  parser.add_argument('-pass-exit-codes', action='store_true')
+  args, leftover = parser.parse_known_args(sys.argv[1:])
+
+  if args.x and args.x[0] == 'rocm':
+    if args.rocm_log: Log('-x rocm')
+    leftover = [pipes.quote(s) for s in leftover]
+    if args.rocm_log: Log('using hipcc')
+    return InvokeHipcc(leftover, log=args.rocm_log)
+
+  # XXX use hipcc to link
+  if args.pass_exit_codes:
+    gpu_compiler_flags = [flag for flag in sys.argv[1:]
+                               if not flag.startswith(('-pass-exit-codes'))]
+
+    # special handling for $ORIGIN
+    # - guard every argument with ''
+    modified_gpu_compiler_flags = []
+    for flag in gpu_compiler_flags:
+      modified_gpu_compiler_flags.append("'" + flag + "'")
+
+    if args.rocm_log: Log('Link with hipcc: %s' % (' '.join([HIPCC_PATH] + modified_gpu_compiler_flags)))
+    return subprocess.call([HIPCC_PATH] + modified_gpu_compiler_flags)
+
+  # Strip our flags before passing through to the CPU compiler for files which
+  # are not -x rocm. We can't just pass 'leftover' because it also strips -x.
+  # We not only want to pass -x to the CPU compiler, but also keep it in its
+  # relative location in the argv list (the compiler is actually sensitive to
+  # this).
+  cpu_compiler_flags = [flag for flag in sys.argv[1:]
+                             if not flag.startswith(('--rocm_log'))]
+
+  # XXX: SE codes need to be built with gcc, but need this macro defined
+  cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__")
+
+  return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 5648b1525a..831a3067b2 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -48,6 +48,7 @@ _DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"]
 CUDA_LIB_PATHS = [
     "lib64/",
     "lib64/stubs/",
+    "lib/powerpc64le-linux-gnu/",
     "lib/x86_64-linux-gnu/",
     "lib/x64/",
     "lib/",
@@ -70,6 +71,7 @@ CUPTI_HEADER_PATHS = [
 # the other CUDA libraries but rather in a special extras/CUPTI directory.
 CUPTI_LIB_PATHS = [
     "extras/CUPTI/lib64/",
+    "lib/powerpc64le-linux-gnu/",
     "lib/x86_64-linux-gnu/",
     "lib64/",
     "extras/CUPTI/libx64/",
@@ -124,118 +126,141 @@ load(
 )
 
 def _get_python_bin(repository_ctx):
-    """Gets the python bin path."""
-    python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
-    if python_bin != None:
-        return python_bin
-    python_bin_name = "python.exe" if _is_windows(repository_ctx) else "python"
-    python_bin_path = repository_ctx.which(python_bin_name)
-    if python_bin_path != None:
-        return str(python_bin_path)
-    auto_configure_fail("Cannot find python in PATH, please make sure " +
-                        "python is installed and add its directory in PATH, or --define " +
-                        "%s='/something/else'.\nPATH=%s" % (
-                            _PYTHON_BIN_PATH,
-                            repository_ctx.os.environ.get("PATH", ""),
-                        ))
+  """Gets the python bin path."""
+  python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
+  if python_bin != None:
+    return python_bin
+  python_bin_name = "python.exe" if _is_windows(repository_ctx) else "python"
+  python_bin_path = repository_ctx.which(python_bin_name)
+  if python_bin_path != None:
+    return str(python_bin_path)
+  auto_configure_fail(
+      "Cannot find python in PATH, please make sure " +
+      "python is installed and add its directory in PATH, or --define " +
+      "%s='/something/else'.\nPATH=%s" % (
+          _PYTHON_BIN_PATH,
+          repository_ctx.os.environ.get("PATH", ""),
+      ))
+
 
 def _get_nvcc_tmp_dir_for_windows(repository_ctx):
-    """Return the tmp directory for nvcc to generate intermediate source files."""
-    escaped_tmp_dir = escape_string(
-        get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace("\\", "\\\\"),
-    )
-    return escaped_tmp_dir + "\\\\nvcc_inter_files_tmp_dir"
+  """Return the tmp directory for nvcc to generate intermediate source files."""
+  escaped_tmp_dir = escape_string(
+      get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
+          "\\", "\\\\"),)
+  return escaped_tmp_dir + "\\\\nvcc_inter_files_tmp_dir"
 
-def _get_msvc_compiler(repository_ctx):
-    vc_path = find_vc_path(repository_ctx)
-    return find_msvc_tool(repository_ctx, vc_path, "cl.exe").replace("\\", "/")
 
-def _get_win_cuda_defines(repository_ctx):
-    """Return CROSSTOOL defines for Windows"""
-
-    # If we are not on Windows, return empty vaules for Windows specific fields.
-    # This ensures the CROSSTOOL file parser is happy.
-    if not _is_windows(repository_ctx):
-        return {
-            "%{msvc_env_tmp}": "",
-            "%{msvc_env_path}": "",
-            "%{msvc_env_include}": "",
-            "%{msvc_env_lib}": "",
-            "%{msvc_cl_path}": "",
-            "%{msvc_ml_path}": "",
-            "%{msvc_link_path}": "",
-            "%{msvc_lib_path}": "",
-            "%{cxx_builtin_include_directory}": "",
-        }
-
-    vc_path = find_vc_path(repository_ctx)
-    if not vc_path:
-        auto_configure_fail("Visual C++ build tools not found on your machine." +
-                            "Please check your installation following https://docs.bazel.build/versions/master/windows.html#using")
-        return {}
-
-    env = setup_vc_env_vars(repository_ctx, vc_path)
-    escaped_paths = escape_string(env["PATH"])
-    escaped_include_paths = escape_string(env["INCLUDE"])
-    escaped_lib_paths = escape_string(env["LIB"])
-    escaped_tmp_dir = escape_string(
-        get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace("\\", "\\\\"),
-    )
+def _get_msvc_compiler(repository_ctx):
+  vc_path = find_vc_path(repository_ctx)
+  return find_msvc_tool(repository_ctx, vc_path, "cl.exe").replace("\\", "/")
 
-    msvc_cl_path = "windows/msvc_wrapper_for_nvcc.bat"
-    msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace("\\", "/")
-    msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace("\\", "/")
-    msvc_lib_path = find_msvc_tool(repository_ctx, vc_path, "lib.exe").replace("\\", "/")
 
-    # nvcc will generate some temporary source files under %{nvcc_tmp_dir}
-    # The generated files are guranteed to have unique name, so they can share the same tmp directory
-    escaped_cxx_include_directories = ["cxx_builtin_include_directory: \"%s\"" % _get_nvcc_tmp_dir_for_windows(repository_ctx)]
-    for path in escaped_include_paths.split(";"):
-        if path:
-            escaped_cxx_include_directories.append("cxx_builtin_include_directory: \"%s\"" % path)
+def _get_win_cuda_defines(repository_ctx):
+  """Return CROSSTOOL defines for Windows"""
 
+  # If we are not on Windows, return empty vaules for Windows specific fields.
+  # This ensures the CROSSTOOL file parser is happy.
+  if not _is_windows(repository_ctx):
     return {
-        "%{msvc_env_tmp}": escaped_tmp_dir,
-        "%{msvc_env_path}": escaped_paths,
-        "%{msvc_env_include}": escaped_include_paths,
-        "%{msvc_env_lib}": escaped_lib_paths,
-        "%{msvc_cl_path}": msvc_cl_path,
-        "%{msvc_ml_path}": msvc_ml_path,
-        "%{msvc_link_path}": msvc_link_path,
-        "%{msvc_lib_path}": msvc_lib_path,
-        "%{cxx_builtin_include_directory}": "\n".join(escaped_cxx_include_directories),
+        "%{msvc_env_tmp}": "",
+        "%{msvc_env_path}": "",
+        "%{msvc_env_include}": "",
+        "%{msvc_env_lib}": "",
+        "%{msvc_cl_path}": "",
+        "%{msvc_ml_path}": "",
+        "%{msvc_link_path}": "",
+        "%{msvc_lib_path}": "",
+        "%{cxx_builtin_include_directory}": "",
     }
 
+  vc_path = find_vc_path(repository_ctx)
+  if not vc_path:
+    auto_configure_fail(
+        "Visual C++ build tools not found on your machine." +
+        "Please check your installation following https://docs.bazel.build/versions/master/windows.html#using"
+    )
+    return {}
+
+  env = setup_vc_env_vars(repository_ctx, vc_path)
+  escaped_paths = escape_string(env["PATH"])
+  escaped_include_paths = escape_string(env["INCLUDE"])
+  escaped_lib_paths = escape_string(env["LIB"])
+  escaped_tmp_dir = escape_string(
+      get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
+          "\\", "\\\\"),)
+
+  msvc_cl_path = "windows/msvc_wrapper_for_nvcc.bat"
+  msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace(
+      "\\", "/")
+  msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace(
+      "\\", "/")
+  msvc_lib_path = find_msvc_tool(repository_ctx, vc_path, "lib.exe").replace(
+      "\\", "/")
+
+  # nvcc will generate some temporary source files under %{nvcc_tmp_dir}
+  # The generated files are guranteed to have unique name, so they can share the same tmp directory
+  escaped_cxx_include_directories = [
+      "cxx_builtin_include_directory: \"%s\"" %
+      _get_nvcc_tmp_dir_for_windows(repository_ctx)
+  ]
+  for path in escaped_include_paths.split(";"):
+    if path:
+      escaped_cxx_include_directories.append(
+          "cxx_builtin_include_directory: \"%s\"" % path)
+
+  return {
+      "%{msvc_env_tmp}":
+          escaped_tmp_dir,
+      "%{msvc_env_path}":
+          escaped_paths,
+      "%{msvc_env_include}":
+          escaped_include_paths,
+      "%{msvc_env_lib}":
+          escaped_lib_paths,
+      "%{msvc_cl_path}":
+          msvc_cl_path,
+      "%{msvc_ml_path}":
+          msvc_ml_path,
+      "%{msvc_link_path}":
+          msvc_link_path,
+      "%{msvc_lib_path}":
+          msvc_lib_path,
+      "%{cxx_builtin_include_directory}":
+          "\n".join(escaped_cxx_include_directories),
+  }
+
 # TODO(dzc): Once these functions have been factored out of Bazel's
 # cc_configure.bzl, load them from @bazel_tools instead.
 # BEGIN cc_configure common functions.
 def find_cc(repository_ctx):
-    """Find the C++ compiler."""
-    if _is_windows(repository_ctx):
-        return _get_msvc_compiler(repository_ctx)
-
-    if _use_cuda_clang(repository_ctx):
-        target_cc_name = "clang"
-        cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
-        if _flag_enabled(repository_ctx, _TF_DOWNLOAD_CLANG):
-            return "extra_tools/bin/clang"
-    else:
-        target_cc_name = "gcc"
-        cc_path_envvar = _GCC_HOST_COMPILER_PATH
-    cc_name = target_cc_name
-
-    if cc_path_envvar in repository_ctx.os.environ:
-        cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
-        if cc_name_from_env:
-            cc_name = cc_name_from_env
-    if cc_name.startswith("/"):
-        # Absolute path, maybe we should make this supported by our which function.
-        return cc_name
-    cc = repository_ctx.which(cc_name)
-    if cc == None:
-        fail(("Cannot find {}, either correct your path or set the {}" +
-              " environment variable").format(target_cc_name, cc_path_envvar))
-    return cc
+  """Find the C++ compiler."""
+  if _is_windows(repository_ctx):
+    return _get_msvc_compiler(repository_ctx)
+
+  if _use_cuda_clang(repository_ctx):
+    target_cc_name = "clang"
+    cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
+    if _flag_enabled(repository_ctx, _TF_DOWNLOAD_CLANG):
+      return "extra_tools/bin/clang"
+  else:
+    target_cc_name = "gcc"
+    cc_path_envvar = _GCC_HOST_COMPILER_PATH
+  cc_name = target_cc_name
+
+  if cc_path_envvar in repository_ctx.os.environ:
+    cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
+    if cc_name_from_env:
+      cc_name = cc_name_from_env
+  if cc_name.startswith("/"):
+    # Absolute path, maybe we should make this supported by our which function.
+    return cc_name
+  cc = repository_ctx.which(cc_name)
+  if cc == None:
+    fail(("Cannot find {}, either correct your path or set the {}" +
+          " environment variable").format(target_cc_name, cc_path_envvar))
+  return cc
+
 
 _INC_DIR_MARKER_BEGIN = "#include <...>"
 
@@ -244,80 +269,82 @@ _OSX_FRAMEWORK_SUFFIX = " (framework directory)"
 _OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX)
 
 def _cxx_inc_convert(path):
-    """Convert path returned by cc -E xc++ in a complete path."""
-    path = path.strip()
-    if path.endswith(_OSX_FRAMEWORK_SUFFIX):
-        path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
-    return path
+  """Convert path returned by cc -E xc++ in a complete path."""
+  path = path.strip()
+  if path.endswith(_OSX_FRAMEWORK_SUFFIX):
+    path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
+  return path
+
 
 def _normalize_include_path(repository_ctx, path):
-    """Normalizes include paths before writing them to the crosstool.
+  """Normalizes include paths before writing them to the crosstool.
 
     If path points inside the 'crosstool' folder of the repository, a relative
     path is returned.
     If path points outside the 'crosstool' folder, an absolute path is returned.
     """
-    path = str(repository_ctx.path(path))
-    crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
+  path = str(repository_ctx.path(path))
+  crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
+
+  if path.startswith(crosstool_folder):
+    # We drop the path to "$REPO/crosstool" and a trailing path separator.
+    return path[len(crosstool_folder) + 1:]
+  return path
 
-    if path.startswith(crosstool_folder):
-        # We drop the path to "$REPO/crosstool" and a trailing path separator.
-        return path[len(crosstool_folder) + 1:]
-    return path
 
 def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
-    """Compute the list of default C or C++ include directories."""
-    if lang_is_cpp:
-        lang = "c++"
-    else:
-        lang = "c"
-    result = repository_ctx.execute([cc, "-E", "-x" + lang, "-", "-v"])
-    index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
-    if index1 == -1:
-        return []
-    index1 = result.stderr.find("\n", index1)
-    if index1 == -1:
-        return []
-    index2 = result.stderr.rfind("\n ")
-    if index2 == -1 or index2 < index1:
-        return []
-    index2 = result.stderr.find("\n", index2 + 1)
-    if index2 == -1:
-        inc_dirs = result.stderr[index1 + 1:]
-    else:
-        inc_dirs = result.stderr[index1 + 1:index2].strip()
+  """Compute the list of default C or C++ include directories."""
+  if lang_is_cpp:
+    lang = "c++"
+  else:
+    lang = "c"
+  result = repository_ctx.execute([cc, "-E", "-x" + lang, "-", "-v"])
+  index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
+  if index1 == -1:
+    return []
+  index1 = result.stderr.find("\n", index1)
+  if index1 == -1:
+    return []
+  index2 = result.stderr.rfind("\n ")
+  if index2 == -1 or index2 < index1:
+    return []
+  index2 = result.stderr.find("\n", index2 + 1)
+  if index2 == -1:
+    inc_dirs = result.stderr[index1 + 1:]
+  else:
+    inc_dirs = result.stderr[index1 + 1:index2].strip()
+
+  return [
+      _normalize_include_path(repository_ctx, _cxx_inc_convert(p))
+      for p in inc_dirs.split("\n")
+  ]
 
-    return [
-        _normalize_include_path(repository_ctx, _cxx_inc_convert(p))
-        for p in inc_dirs.split("\n")
-    ]
 
 def get_cxx_inc_directories(repository_ctx, cc):
-    """Compute the list of default C and C++ include directories."""
-
-    # For some reason `clang -xc` sometimes returns include paths that are
-    # different from the ones from `clang -xc++`. (Symlink and a dir)
-    # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
-    includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
-    includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
-
-    includes_cpp_set = depset(includes_cpp)
-    return includes_cpp + [
-        inc
-        for inc in includes_c
-        if inc not in includes_cpp_set
-    ]
+  """Compute the list of default C and C++ include directories."""
+
+  # For some reason `clang -xc` sometimes returns include paths that are
+  # different from the ones from `clang -xc++`. (Symlink and a dir)
+  # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
+  includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
+  includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
+
+  includes_cpp_set = depset(includes_cpp)
+  return includes_cpp + [
+      inc for inc in includes_c if inc not in includes_cpp_set
+  ]
+
 
 def auto_configure_fail(msg):
-    """Output failure message when cuda configuration fails."""
-    red = "\033[0;31m"
-    no_color = "\033[0m"
-    fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg))
+  """Output failure message when cuda configuration fails."""
+  red = "\033[0;31m"
+  no_color = "\033[0m"
+  fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg))
 
 # END cc_configure common functions (see TODO above).
 
 def _host_compiler_includes(repository_ctx, cc):
-    """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
+  """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
 
     Args:
       repository_ctx: The repository context.
@@ -328,14 +355,15 @@ def _host_compiler_includes(repository_ctx, cc):
       host compiler include directories, which can be added to the CROSSTOOL
       file.
     """
-    inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
-    inc_entries = []
-    for inc_dir in inc_dirs:
-        inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
-    return "\n".join(inc_entries)
+  inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
+  inc_entries = []
+  for inc_dir in inc_dirs:
+    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+  return "\n".join(inc_entries)
+
 
 def _cuda_include_path(repository_ctx, cuda_config):
-    """Generates the cxx_builtin_include_directory entries for cuda inc dirs.
+  """Generates the cxx_builtin_include_directory entries for cuda inc dirs.
 
     Args:
       repository_ctx: The repository context.
@@ -346,39 +374,41 @@ def _cuda_include_path(repository_ctx, cuda_config):
       host compiler include directories, which can be added to the CROSSTOOL
       file.
     """
-    nvcc_path = repository_ctx.path("%s/bin/nvcc%s" %
-                                    (
-                                        cuda_config.cuda_toolkit_path,
-                                        ".exe" if cuda_config.cpu_value == "Windows" else "",
-                                    ))
-    result = repository_ctx.execute([
-        nvcc_path,
-        "-v",
-        "/dev/null",
-        "-o",
-        "/dev/null",
-    ])
-    target_dir = ""
-    for one_line in result.stderr.splitlines():
-        if one_line.startswith("#$ _TARGET_DIR_="):
-            target_dir = (cuda_config.cuda_toolkit_path + "/" +
-                          one_line.replace("#$ _TARGET_DIR_=", "") + "/include")
-    inc_entries = []
-    if target_dir != "":
-        inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % target_dir)
-    default_include = cuda_config.cuda_toolkit_path + "/include"
-    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" %
-                       default_include)
-    return "\n".join(inc_entries)
+  nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % (
+      cuda_config.cuda_toolkit_path,
+      ".exe" if cuda_config.cpu_value == "Windows" else "",
+  ))
+  result = repository_ctx.execute([
+      nvcc_path,
+      "-v",
+      "/dev/null",
+      "-o",
+      "/dev/null",
+  ])
+  target_dir = ""
+  for one_line in result.stderr.splitlines():
+    if one_line.startswith("#$ _TARGET_DIR_="):
+      target_dir = (
+          cuda_config.cuda_toolkit_path + "/" + one_line.replace(
+              "#$ _TARGET_DIR_=", "") + "/include")
+  inc_entries = []
+  if target_dir != "":
+    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % target_dir)
+  default_include = cuda_config.cuda_toolkit_path + "/include"
+  inc_entries.append(
+      "  cxx_builtin_include_directory: \"%s\"" % default_include)
+  return "\n".join(inc_entries)
+
 
 def _enable_cuda(repository_ctx):
-    if "TF_NEED_CUDA" in repository_ctx.os.environ:
-        enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip()
-        return enable_cuda == "1"
-    return False
+  if "TF_NEED_CUDA" in repository_ctx.os.environ:
+    enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip()
+    return enable_cuda == "1"
+  return False
+
 
-def _cuda_toolkit_path(repository_ctx):
-    """Finds the cuda toolkit directory.
+def cuda_toolkit_path(repository_ctx):
+  """Finds the cuda toolkit directory.
 
     Args:
       repository_ctx: The repository context.
@@ -386,27 +416,31 @@ def _cuda_toolkit_path(repository_ctx):
     Returns:
       A speculative real path of the cuda toolkit install directory.
     """
-    cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
-    if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ:
-        cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip()
-    if not repository_ctx.path(cuda_toolkit_path).exists:
-        auto_configure_fail("Cannot find cuda toolkit path.")
-    return str(repository_ctx.path(cuda_toolkit_path).realpath)
+  cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
+  if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ:
+    cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip()
+  if not repository_ctx.path(cuda_toolkit_path).exists:
+    auto_configure_fail("Cannot find cuda toolkit path.")
+  return str(repository_ctx.path(cuda_toolkit_path).realpath)
+
 
 def _cudnn_install_basedir(repository_ctx):
-    """Finds the cudnn install directory."""
-    cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
-    if _CUDNN_INSTALL_PATH in repository_ctx.os.environ:
-        cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip()
-    if not repository_ctx.path(cudnn_install_path).exists:
-        auto_configure_fail("Cannot find cudnn install path.")
-    return cudnn_install_path
+  """Finds the cudnn install directory."""
+  cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
+  if _CUDNN_INSTALL_PATH in repository_ctx.os.environ:
+    cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip()
+  if not repository_ctx.path(cudnn_install_path).exists:
+    auto_configure_fail("Cannot find cudnn install path.")
+  return cudnn_install_path
+
 
 def matches_version(environ_version, detected_version):
-    """Checks whether the user-specified version matches the detected version.
+  """Checks whether the user-specified version matches the detected version.
 
-    This function performs a weak matching so that if the user specifies only the
-    major or major and minor versions, the versions are still considered matching
+    This function performs a weak matching so that if the user specifies only
+    the
+    major or major and minor versions, the versions are still considered
+    matching
     if the version parts match. To illustrate:
 
         environ_version  detected_version  result
@@ -422,25 +456,25 @@ def matches_version(environ_version, detected_version):
         variables.
       detected_version: The version autodetected from the CUDA installation on
         the system.
-
     Returns: True if user-specified version matches detected version and False
       otherwise.
-    """
-    environ_version_parts = environ_version.split(".")
-    detected_version_parts = detected_version.split(".")
-    if len(detected_version_parts) < len(environ_version_parts):
-        return False
-    for i, part in enumerate(detected_version_parts):
-        if i >= len(environ_version_parts):
-            break
-        if part != environ_version_parts[i]:
-            return False
-    return True
+  """
+  environ_version_parts = environ_version.split(".")
+  detected_version_parts = detected_version.split(".")
+  if len(detected_version_parts) < len(environ_version_parts):
+    return False
+  for i, part in enumerate(detected_version_parts):
+    if i >= len(environ_version_parts):
+      break
+    if part != environ_version_parts[i]:
+      return False
+  return True
+
 
 _NVCC_VERSION_PREFIX = "Cuda compilation tools, release "
 
 def _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value):
-    """Detects the version of CUDA installed on the system.
+  """Detects the version of CUDA installed on the system.
 
     Args:
       repository_ctx: The repository context.
@@ -450,64 +484,61 @@ def _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value):
       String containing the version of CUDA.
     """
 
-    # Run nvcc --version and find the line containing the CUDA version.
-    nvcc_path = repository_ctx.path("%s/bin/nvcc%s" %
-                                    (
-                                        cuda_toolkit_path,
-                                        ".exe" if cpu_value == "Windows" else "",
-                                    ))
-    if not nvcc_path.exists:
-        auto_configure_fail("Cannot find nvcc at %s" % str(nvcc_path))
-    result = repository_ctx.execute([str(nvcc_path), "--version"])
-    if result.stderr:
-        auto_configure_fail("Error running nvcc --version: %s" % result.stderr)
-    lines = result.stdout.splitlines()
-    version_line = lines[len(lines) - 1]
-    if version_line.find(_NVCC_VERSION_PREFIX) == -1:
-        auto_configure_fail(
-            "Could not parse CUDA version from nvcc --version. Got: %s" %
-            result.stdout,
-        )
-
-    # Parse the CUDA version from the line containing the CUDA version.
-    prefix_removed = version_line.replace(_NVCC_VERSION_PREFIX, "")
-    parts = prefix_removed.split(",")
-    if len(parts) != 2 or len(parts[0]) < 2:
-        auto_configure_fail(
-            "Could not parse CUDA version from nvcc --version. Got: %s" %
-            result.stdout,
-        )
-    full_version = parts[1].strip()
-    if full_version.startswith("V"):
-        full_version = full_version[1:]
-
-    # Check whether TF_CUDA_VERSION was set by the user and fail if it does not
-    # match the detected version.
-    environ_version = ""
-    if _TF_CUDA_VERSION in repository_ctx.os.environ:
-        environ_version = repository_ctx.os.environ[_TF_CUDA_VERSION].strip()
-    if environ_version and not matches_version(environ_version, full_version):
-        auto_configure_fail(
-            ("CUDA version detected from nvcc (%s) does not match " +
-             "TF_CUDA_VERSION (%s)") % (full_version, environ_version),
-        )
-
-    # We only use the version consisting of the major and minor version numbers.
-    version_parts = full_version.split(".")
-    if len(version_parts) < 2:
-        auto_configure_fail("CUDA version detected from nvcc (%s) is incomplete.")
-    if cpu_value == "Windows":
-        version = "64_%s%s" % (version_parts[0], version_parts[1])
-    else:
-        version = "%s.%s" % (version_parts[0], version_parts[1])
-    return version
+  # Run nvcc --version and find the line containing the CUDA version.
+  nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % (
+      cuda_toolkit_path,
+      ".exe" if cpu_value == "Windows" else "",
+  ))
+  if not nvcc_path.exists:
+    auto_configure_fail("Cannot find nvcc at %s" % str(nvcc_path))
+  result = repository_ctx.execute([str(nvcc_path), "--version"])
+  if result.stderr:
+    auto_configure_fail("Error running nvcc --version: %s" % result.stderr)
+  lines = result.stdout.splitlines()
+  version_line = lines[len(lines) - 1]
+  if version_line.find(_NVCC_VERSION_PREFIX) == -1:
+    auto_configure_fail(
+        "Could not parse CUDA version from nvcc --version. Got: %s" %
+        result.stdout,)
+
+  # Parse the CUDA version from the line containing the CUDA version.
+  prefix_removed = version_line.replace(_NVCC_VERSION_PREFIX, "")
+  parts = prefix_removed.split(",")
+  if len(parts) != 2 or len(parts[0]) < 2:
+    auto_configure_fail(
+        "Could not parse CUDA version from nvcc --version. Got: %s" %
+        result.stdout,)
+  full_version = parts[1].strip()
+  if full_version.startswith("V"):
+    full_version = full_version[1:]
+
+  # Check whether TF_CUDA_VERSION was set by the user and fail if it does not
+  # match the detected version.
+  environ_version = ""
+  if _TF_CUDA_VERSION in repository_ctx.os.environ:
+    environ_version = repository_ctx.os.environ[_TF_CUDA_VERSION].strip()
+  if environ_version and not matches_version(environ_version, full_version):
+    auto_configure_fail(
+        ("CUDA version detected from nvcc (%s) does not match " +
+         "TF_CUDA_VERSION (%s)") % (full_version, environ_version),)
+
+  # We only use the version consisting of the major and minor version numbers.
+  version_parts = full_version.split(".")
+  if len(version_parts) < 2:
+    auto_configure_fail("CUDA version detected from nvcc (%s) is incomplete.")
+  if cpu_value == "Windows":
+    version = "64_%s%s" % (version_parts[0], version_parts[1])
+  else:
+    version = "%s.%s" % (version_parts[0], version_parts[1])
+  return version
+
 
 _DEFINE_CUDNN_MAJOR = "#define CUDNN_MAJOR"
 _DEFINE_CUDNN_MINOR = "#define CUDNN_MINOR"
 _DEFINE_CUDNN_PATCHLEVEL = "#define CUDNN_PATCHLEVEL"
 
 def find_cuda_define(repository_ctx, header_dir, header_file, define):
-    """Returns the value of a #define in a header file.
+  """Returns the value of a #define in a header file.
 
     Greps through a header file and returns the value of the specified #define.
     If the #define is not found, then raise an error.
@@ -522,52 +553,52 @@ def find_cuda_define(repository_ctx, header_dir, header_file, define):
       The value of the #define found in the header.
     """
 
-    # Confirm location of the header and grep for the line defining the macro.
-    h_path = repository_ctx.path("%s/%s" % (header_dir, header_file))
-    if not h_path.exists:
-        auto_configure_fail("Cannot find %s at %s" % (header_file, str(h_path)))
-    result = repository_ctx.execute(
-        # Grep one more lines as some #defines are splitted into two lines.
-        ["grep", "--color=never", "-A1", "-E", define, str(h_path)],
-    )
-    if result.stderr:
-        auto_configure_fail("Error reading %s: %s" % (str(h_path), result.stderr))
-
-    # Parse the version from the line defining the macro.
-    if result.stdout.find(define) == -1:
-        auto_configure_fail("Cannot find line containing '%s' in %s" %
-                            (define, h_path))
-
-    # Split results to lines
-    lines = result.stdout.split("\n")
-    num_lines = len(lines)
-    for l in range(num_lines):
-        line = lines[l]
-        if define in line:  # Find the line with define
-            version = line
-            if l != num_lines - 1 and line[-1] == "\\":  # Add next line, if multiline
-                version = version[:-1] + lines[l + 1]
-            break
-
-    # Remove any comments
-    version = version.split("//")[0]
-
-    # Remove define name
-    version = version.replace(define, "").strip()
-
-    # Remove the code after the version number.
-    version_end = version.find(" ")
-    if version_end != -1:
-        if version_end == 0:
-            auto_configure_fail(
-                "Cannot extract the version from line containing '%s' in %s" %
-                (define, str(h_path)),
-            )
-        version = version[:version_end].strip()
-    return version
+  # Confirm location of the header and grep for the line defining the macro.
+  h_path = repository_ctx.path("%s/%s" % (header_dir, header_file))
+  if not h_path.exists:
+    auto_configure_fail("Cannot find %s at %s" % (header_file, str(h_path)))
+  result = repository_ctx.execute(
+      # Grep one more lines as some #defines are splitted into two lines.
+      ["grep", "--color=never", "-A1", "-E", define,
+       str(h_path)],)
+  if result.stderr:
+    auto_configure_fail("Error reading %s: %s" % (str(h_path), result.stderr))
+
+  # Parse the version from the line defining the macro.
+  if result.stdout.find(define) == -1:
+    auto_configure_fail(
+        "Cannot find line containing '%s' in %s" % (define, h_path))
+
+  # Split results to lines
+  lines = result.stdout.split("\n")
+  num_lines = len(lines)
+  for l in range(num_lines):
+    line = lines[l]
+    if define in line:  # Find the line with define
+      version = line
+      if l != num_lines - 1 and line[-1] == "\\":  # Add next line, if multiline
+        version = version[:-1] + lines[l + 1]
+      break
+
+  # Remove any comments
+  version = version.split("//")[0]
+
+  # Remove define name
+  version = version.replace(define, "").strip()
+
+  # Remove the code after the version number.
+  version_end = version.find(" ")
+  if version_end != -1:
+    if version_end == 0:
+      auto_configure_fail(
+          "Cannot extract the version from line containing '%s' in %s" %
+          (define, str(h_path)),)
+    version = version[:version_end].strip()
+  return version
+
 
 def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value):
-    """Detects the version of cuDNN installed on the system.
+  """Detects the version of cuDNN installed on the system.
 
     Args:
       repository_ctx: The repository context.
@@ -577,68 +608,68 @@ def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value):
     Returns:
       A string containing the version of cuDNN.
     """
-    cudnn_header_dir = _find_cudnn_header_dir(
-        repository_ctx,
-        cudnn_install_basedir,
-    )
-    major_version = find_cuda_define(
-        repository_ctx,
-        cudnn_header_dir,
-        "cudnn.h",
-        _DEFINE_CUDNN_MAJOR,
-    )
-    minor_version = find_cuda_define(
-        repository_ctx,
-        cudnn_header_dir,
-        "cudnn.h",
-        _DEFINE_CUDNN_MINOR,
-    )
-    patch_version = find_cuda_define(
-        repository_ctx,
-        cudnn_header_dir,
-        "cudnn.h",
-        _DEFINE_CUDNN_PATCHLEVEL,
-    )
-    full_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
-
-    # Check whether TF_CUDNN_VERSION was set by the user and fail if it does not
-    # match the detected version.
-    environ_version = ""
-    if _TF_CUDNN_VERSION in repository_ctx.os.environ:
-        environ_version = repository_ctx.os.environ[_TF_CUDNN_VERSION].strip()
-    if environ_version and not matches_version(environ_version, full_version):
-        cudnn_h_path = repository_ctx.path("%s/include/cudnn.h" %
-                                           cudnn_install_basedir)
-        auto_configure_fail(
-            ("cuDNN version detected from %s (%s) does not match " +
-             "TF_CUDNN_VERSION (%s)") %
-            (str(cudnn_h_path), full_version, environ_version),
-        )
-
-    # We only use the major version since we use the libcudnn libraries that are
-    # only versioned with the major version (e.g. libcudnn.so.5).
-    version = major_version
-    if cpu_value == "Windows":
-        version = "64_" + version
-    return version
-
-def _compute_capabilities(repository_ctx):
-    """Returns a list of strings representing cuda compute capabilities."""
-    if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ:
-        return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
-    capabilities_str = repository_ctx.os.environ[_TF_CUDA_COMPUTE_CAPABILITIES]
-    capabilities = capabilities_str.split(",")
-    for capability in capabilities:
-        # Workaround for Skylark's lack of support for regex. This check should
-        # be equivalent to checking:
-        #     if re.match("[0-9]+.[0-9]+", capability) == None:
-        parts = capability.split(".")
-        if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit():
-            auto_configure_fail("Invalid compute capability: %s" % capability)
-    return capabilities
+  cudnn_header_dir = _find_cudnn_header_dir(
+      repository_ctx,
+      cudnn_install_basedir,
+  )
+  major_version = find_cuda_define(
+      repository_ctx,
+      cudnn_header_dir,
+      "cudnn.h",
+      _DEFINE_CUDNN_MAJOR,
+  )
+  minor_version = find_cuda_define(
+      repository_ctx,
+      cudnn_header_dir,
+      "cudnn.h",
+      _DEFINE_CUDNN_MINOR,
+  )
+  patch_version = find_cuda_define(
+      repository_ctx,
+      cudnn_header_dir,
+      "cudnn.h",
+      _DEFINE_CUDNN_PATCHLEVEL,
+  )
+  full_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
+
+  # Check whether TF_CUDNN_VERSION was set by the user and fail if it does not
+  # match the detected version.
+  environ_version = ""
+  if _TF_CUDNN_VERSION in repository_ctx.os.environ:
+    environ_version = repository_ctx.os.environ[_TF_CUDNN_VERSION].strip()
+  if environ_version and not matches_version(environ_version, full_version):
+    cudnn_h_path = repository_ctx.path(
+        "%s/include/cudnn.h" % cudnn_install_basedir)
+    auto_configure_fail(("cuDNN version detected from %s (%s) does not match " +
+                         "TF_CUDNN_VERSION (%s)") %
+                        (str(cudnn_h_path), full_version, environ_version),)
+
+  # We only use the major version since we use the libcudnn libraries that are
+  # only versioned with the major version (e.g. libcudnn.so.5).
+  version = major_version
+  if cpu_value == "Windows":
+    version = "64_" + version
+  return version
+
+
+def compute_capabilities(repository_ctx):
+  """Returns a list of strings representing cuda compute capabilities."""
+  if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ:
+    return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
+  capabilities_str = repository_ctx.os.environ[_TF_CUDA_COMPUTE_CAPABILITIES]
+  capabilities = capabilities_str.split(",")
+  for capability in capabilities:
+    # Workaround for Skylark's lack of support for regex. This check should
+    # be equivalent to checking:
+    #     if re.match("[0-9]+.[0-9]+", capability) == None:
+    parts = capability.split(".")
+    if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit():
+      auto_configure_fail("Invalid compute capability: %s" % capability)
+  return capabilities
+
 
 def get_cpu_value(repository_ctx):
-    """Returns the name of the host operating system.
+  """Returns the name of the host operating system.
 
     Args:
       repository_ctx: The repository context.
@@ -646,20 +677,22 @@ def get_cpu_value(repository_ctx):
     Returns:
       A string containing the name of the host operating system.
     """
-    os_name = repository_ctx.os.name.lower()
-    if os_name.startswith("mac os"):
-        return "Darwin"
-    if os_name.find("windows") != -1:
-        return "Windows"
-    result = repository_ctx.execute(["uname", "-s"])
-    return result.stdout.strip()
+  os_name = repository_ctx.os.name.lower()
+  if os_name.startswith("mac os"):
+    return "Darwin"
+  if os_name.find("windows") != -1:
+    return "Windows"
+  result = repository_ctx.execute(["uname", "-s"])
+  return result.stdout.strip()
+
 
 def _is_windows(repository_ctx):
-    """Returns true if the host operating system is windows."""
-    return get_cpu_value(repository_ctx) == "Windows"
+  """Returns true if the host operating system is windows."""
+  return get_cpu_value(repository_ctx) == "Windows"
+
 
 def _lib_name(lib, cpu_value, version = "", static = False):
-    """Constructs the platform-specific name of a library.
+  """Constructs the platform-specific name of a library.
 
     Args:
       lib: The name of the library, such as "cudart"
@@ -670,23 +703,24 @@ def _lib_name(lib, cpu_value, version = "", static = False):
     Returns:
       The platform-specific name of the library.
     """
-    if cpu_value in ("Linux", "FreeBSD"):
-        if static:
-            return "lib%s.a" % lib
-        else:
-            if version:
-                version = ".%s" % version
-            return "lib%s.so%s" % (lib, version)
-    elif cpu_value == "Windows":
-        return "%s.lib" % lib
-    elif cpu_value == "Darwin":
-        if static:
-            return "lib%s.a" % lib
-        elif version:
-            version = ".%s" % version
-        return "lib%s%s.dylib" % (lib, version)
+  if cpu_value in ("Linux", "FreeBSD"):
+    if static:
+      return "lib%s.a" % lib
     else:
-        auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
+      if version:
+        version = ".%s" % version
+      return "lib%s.so%s" % (lib, version)
+  elif cpu_value == "Windows":
+    return "%s.lib" % lib
+  elif cpu_value == "Darwin":
+    if static:
+      return "lib%s.a" % lib
+    elif version:
+      version = ".%s" % version
+    return "lib%s%s.dylib" % (lib, version)
+  else:
+    auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
+
 
 def _find_cuda_lib(
         lib,
@@ -695,7 +729,7 @@ def _find_cuda_lib(
         basedir,
         version = "",
         static = False):
-    """Finds the given CUDA or cuDNN library on the system.
+  """Finds the given CUDA or cuDNN library on the system.
 
     Args:
       lib: The name of the library, such as "cudart"
@@ -710,15 +744,16 @@ def _find_cuda_lib(
         file_name: The basename of the library found on the system.
         path: The full path to the library.
     """
-    file_name = _lib_name(lib, cpu_value, version, static)
-    for relative_path in CUDA_LIB_PATHS:
-        path = repository_ctx.path("%s/%s%s" % (basedir, relative_path, file_name))
-        if path.exists:
-            return struct(file_name = file_name, path = str(path.realpath))
-    auto_configure_fail("Cannot find cuda library %s" % file_name)
+  file_name = _lib_name(lib, cpu_value, version, static)
+  for relative_path in CUDA_LIB_PATHS:
+    path = repository_ctx.path("%s/%s%s" % (basedir, relative_path, file_name))
+    if path.exists:
+      return struct(file_name=file_name, path=str(path.realpath))
+  auto_configure_fail("Cannot find cuda library %s" % file_name)
+
 
 def _find_cupti_header_dir(repository_ctx, cuda_config):
-    """Returns the path to the directory containing cupti.h
+  """Returns the path to the directory containing cupti.h
 
     On most systems, the cupti library is not installed in the same directory as
     the other CUDA libraries but rather in a special extras/CUPTI directory.
@@ -730,14 +765,17 @@ def _find_cupti_header_dir(repository_ctx, cuda_config):
     Returns:
       The path of the directory containing the cupti header.
     """
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for relative_path in CUPTI_HEADER_PATHS:
-        if repository_ctx.path("%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists:
-            return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-    auto_configure_fail("Cannot find cupti.h under %s" % ", ".join([cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS]))
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  for relative_path in CUPTI_HEADER_PATHS:
+    if repository_ctx.path(
+        "%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists:
+      return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
+  auto_configure_fail("Cannot find cupti.h under %s" % ", ".join(
+      [cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS]))
+
 
 def _find_cupti_lib(repository_ctx, cuda_config):
-    """Finds the cupti library on the system.
+  """Finds the cupti library on the system.
 
     On most systems, the cupti library is not installed in the same directory as
     the other CUDA libraries but rather in a special extras/CUPTI directory.
@@ -751,23 +789,23 @@ def _find_cupti_lib(repository_ctx, cuda_config):
         file_name: The basename of the library found on the system.
         path: The full path to the library.
     """
-    file_name = _lib_name(
-        "cupti",
-        cuda_config.cpu_value,
-        cuda_config.cuda_version,
-    )
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for relative_path in CUPTI_LIB_PATHS:
-        path = repository_ctx.path(
-            "%s/%s%s" % (cuda_toolkit_path, relative_path, file_name),
-        )
-        if path.exists:
-            return struct(file_name = file_name, path = str(path.realpath))
+  file_name = _lib_name(
+      "cupti",
+      cuda_config.cpu_value,
+      cuda_config.cuda_version,
+  )
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  for relative_path in CUPTI_LIB_PATHS:
+    path = repository_ctx.path(
+        "%s/%s%s" % (cuda_toolkit_path, relative_path, file_name),)
+    if path.exists:
+      return struct(file_name=file_name, path=str(path.realpath))
+
+  auto_configure_fail("Cannot find cupti library %s" % file_name)
 
-    auto_configure_fail("Cannot find cupti library %s" % file_name)
 
 def _find_libs(repository_ctx, cuda_config):
-    """Returns the CUDA and cuDNN libraries on the system.
+  """Returns the CUDA and cuDNN libraries on the system.
 
     Args:
       repository_ctx: The repository context.
@@ -776,64 +814,75 @@ def _find_libs(repository_ctx, cuda_config):
     Returns:
       Map of library names to structs of filename and path.
     """
-    cpu_value = cuda_config.cpu_value
-    return {
-        "cuda": _find_cuda_lib("cuda", repository_ctx, cpu_value, cuda_config.cuda_toolkit_path),
-        "cudart": _find_cuda_lib(
-            "cudart",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "cudart_static": _find_cuda_lib(
-            "cudart_static",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-            static = True,
-        ),
-        "cublas": _find_cuda_lib(
-            "cublas",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "cusolver": _find_cuda_lib(
-            "cusolver",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "curand": _find_cuda_lib(
-            "curand",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "cufft": _find_cuda_lib(
-            "cufft",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "cudnn": _find_cuda_lib(
-            "cudnn",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cudnn_install_basedir,
-            cuda_config.cudnn_version,
-        ),
-        "cupti": _find_cupti_lib(repository_ctx, cuda_config),
-    }
+  cpu_value = cuda_config.cpu_value
+  return {
+      "cuda":
+          _find_cuda_lib("cuda", repository_ctx, cpu_value,
+                         cuda_config.cuda_toolkit_path),
+      "cudart":
+          _find_cuda_lib(
+              "cudart",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "cudart_static":
+          _find_cuda_lib(
+              "cudart_static",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+              static=True,
+          ),
+      "cublas":
+          _find_cuda_lib(
+              "cublas",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "cusolver":
+          _find_cuda_lib(
+              "cusolver",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "curand":
+          _find_cuda_lib(
+              "curand",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "cufft":
+          _find_cuda_lib(
+              "cufft",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "cudnn":
+          _find_cuda_lib(
+              "cudnn",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cudnn_install_basedir,
+              cuda_config.cudnn_version,
+          ),
+      "cupti":
+          _find_cupti_lib(repository_ctx, cuda_config),
+  }
+
 
 def _find_cuda_include_path(repository_ctx, cuda_config):
-    """Returns the path to the directory containing cuda.h
+  """Returns the path to the directory containing cuda.h
 
     Args:
       repository_ctx: The repository context.
@@ -842,14 +891,16 @@ def _find_cuda_include_path(repository_ctx, cuda_config):
     Returns:
       The path of the directory containing the CUDA headers.
     """
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for relative_path in CUDA_INCLUDE_PATHS:
-        if repository_ctx.path("%s/%scuda.h" % (cuda_toolkit_path, relative_path)).exists:
-            return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-    auto_configure_fail("Cannot find cuda.h under %s" % cuda_toolkit_path)
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  for relative_path in CUDA_INCLUDE_PATHS:
+    if repository_ctx.path(
+        "%s/%scuda.h" % (cuda_toolkit_path, relative_path)).exists:
+      return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
+  auto_configure_fail("Cannot find cuda.h under %s" % cuda_toolkit_path)
+
 
 def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir):
-    """Returns the path to the directory containing cudnn.h
+  """Returns the path to the directory containing cudnn.h
 
     Args:
       repository_ctx: The repository context.
@@ -859,15 +910,17 @@ def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir):
     Returns:
       The path of the directory containing the cudnn header.
     """
-    for relative_path in CUDA_INCLUDE_PATHS:
-        if repository_ctx.path("%s/%scudnn.h" % (cudnn_install_basedir, relative_path)).exists:
-            return ("%s/%s" % (cudnn_install_basedir, relative_path))[:-1]
-    if repository_ctx.path("/usr/include/cudnn.h").exists:
-        return "/usr/include"
-    auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
+  for relative_path in CUDA_INCLUDE_PATHS:
+    if repository_ctx.path(
+        "%s/%scudnn.h" % (cudnn_install_basedir, relative_path)).exists:
+      return ("%s/%s" % (cudnn_install_basedir, relative_path))[:-1]
+  if repository_ctx.path("/usr/include/cudnn.h").exists:
+    return "/usr/include"
+  auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
+
 
 def _find_nvvm_libdevice_dir(repository_ctx, cuda_config):
-    """Returns the path to the directory containing libdevice in bitcode format.
+  """Returns the path to the directory containing libdevice in bitcode format.
 
     Args:
       repository_ctx: The repository context.
@@ -876,19 +929,23 @@ def _find_nvvm_libdevice_dir(repository_ctx, cuda_config):
     Returns:
       The path of the directory containing the CUDA headers.
     """
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for libdevice_file in NVVM_LIBDEVICE_FILES:
-        for relative_path in NVVM_LIBDEVICE_PATHS:
-            if repository_ctx.path("%s/%s%s" % (cuda_toolkit_path, relative_path, libdevice_file)).exists:
-                return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-    auto_configure_fail("Cannot find libdevice*.bc files under %s" % cuda_toolkit_path)
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  for libdevice_file in NVVM_LIBDEVICE_FILES:
+    for relative_path in NVVM_LIBDEVICE_PATHS:
+      if repository_ctx.path("%s/%s%s" % (cuda_toolkit_path, relative_path,
+                                          libdevice_file)).exists:
+        return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
+  auto_configure_fail(
+      "Cannot find libdevice*.bc files under %s" % cuda_toolkit_path)
+
 
 def _cudart_static_linkopt(cpu_value):
-    """Returns additional platform-specific linkopts for cudart."""
-    return "" if cpu_value == "Darwin" else "\"-lrt\","
+  """Returns additional platform-specific linkopts for cudart."""
+  return "" if cpu_value == "Darwin" else "\"-lrt\","
+
 
 def _get_cuda_config(repository_ctx):
-    """Detects and returns information about the CUDA installation on the system.
+  """Detects and returns information about the CUDA installation on the system.
 
     Args:
       repository_ctx: The repository context.
@@ -902,35 +959,39 @@ def _get_cuda_config(repository_ctx):
         compute_capabilities: A list of the system's CUDA compute capabilities.
         cpu_value: The name of the host operating system.
     """
-    cpu_value = get_cpu_value(repository_ctx)
-    cuda_toolkit_path = _cuda_toolkit_path(repository_ctx)
-    cuda_version = _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value)
-    cudnn_install_basedir = _cudnn_install_basedir(repository_ctx)
-    cudnn_version = _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value)
-    return struct(
-        cuda_toolkit_path = cuda_toolkit_path,
-        cudnn_install_basedir = cudnn_install_basedir,
-        cuda_version = cuda_version,
-        cudnn_version = cudnn_version,
-        compute_capabilities = _compute_capabilities(repository_ctx),
-        cpu_value = cpu_value,
-    )
+  cpu_value = get_cpu_value(repository_ctx)
+  toolkit_path = cuda_toolkit_path(repository_ctx)
+  cuda_version = _cuda_version(repository_ctx, toolkit_path, cpu_value)
+  cudnn_install_basedir = _cudnn_install_basedir(repository_ctx)
+  cudnn_version = _cudnn_version(repository_ctx, cudnn_install_basedir,
+                                 cpu_value)
+  return struct(
+      cuda_toolkit_path=toolkit_path,
+      cudnn_install_basedir=cudnn_install_basedir,
+      cuda_version=cuda_version,
+      cudnn_version=cudnn_version,
+      compute_capabilities=compute_capabilities(repository_ctx),
+      cpu_value=cpu_value,
+  )
+
 
 def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
-    if not out:
-        out = tpl.replace(":", "/")
-    repository_ctx.template(
-        out,
-        Label("//third_party/gpus/%s.tpl" % tpl),
-        substitutions,
-    )
+  if not out:
+    out = tpl.replace(":", "/")
+  repository_ctx.template(
+      out,
+      Label("//third_party/gpus/%s.tpl" % tpl),
+      substitutions,
+  )
+
 
 def _file(repository_ctx, label):
-    repository_ctx.template(
-        label.replace(":", "/"),
-        Label("//third_party/gpus/%s.tpl" % label),
-        {},
-    )
+  repository_ctx.template(
+      label.replace(":", "/"),
+      Label("//third_party/gpus/%s.tpl" % label),
+      {},
+  )
+
 
 _DUMMY_CROSSTOOL_BZL_FILE = """
 def error_gpu_disabled():
@@ -958,81 +1019,99 @@ error_gpu_disabled()
 """
 
 def _create_dummy_repository(repository_ctx):
-    cpu_value = get_cpu_value(repository_ctx)
+  cpu_value = get_cpu_value(repository_ctx)
+
+  # Set up BUILD file for cuda/.
+  _tpl(
+      repository_ctx,
+      "cuda:build_defs.bzl",
+      {
+          "%{cuda_is_configured}": "False",
+          "%{cuda_extra_copts}": "[]",
+      },
+  )
+  _tpl(
+      repository_ctx,
+      "cuda:BUILD",
+      {
+          "%{cuda_driver_lib}":
+              _lib_name("cuda", cpu_value),
+          "%{cudart_static_lib}":
+              _lib_name(
+                  "cudart_static",
+                  cpu_value,
+                  static=True,
+              ),
+          "%{cudart_static_linkopt}":
+              _cudart_static_linkopt(cpu_value),
+          "%{cudart_lib}":
+              _lib_name("cudart", cpu_value),
+          "%{cublas_lib}":
+              _lib_name("cublas", cpu_value),
+          "%{cusolver_lib}":
+              _lib_name("cusolver", cpu_value),
+          "%{cudnn_lib}":
+              _lib_name("cudnn", cpu_value),
+          "%{cufft_lib}":
+              _lib_name("cufft", cpu_value),
+          "%{curand_lib}":
+              _lib_name("curand", cpu_value),
+          "%{cupti_lib}":
+              _lib_name("cupti", cpu_value),
+          "%{cuda_include_genrules}":
+              "",
+          "%{cuda_headers}":
+              "",
+      },
+  )
 
-    # Set up BUILD file for cuda/.
-    _tpl(
-        repository_ctx,
-        "cuda:build_defs.bzl",
-        {
-            "%{cuda_is_configured}": "False",
-            "%{cuda_extra_copts}": "[]",
-        },
-    )
-    _tpl(
-        repository_ctx,
-        "cuda:BUILD",
-        {
-            "%{cuda_driver_lib}": _lib_name("cuda", cpu_value),
-            "%{cudart_static_lib}": _lib_name(
-                "cudart_static",
-                cpu_value,
-                static = True,
-            ),
-            "%{cudart_static_linkopt}": _cudart_static_linkopt(cpu_value),
-            "%{cudart_lib}": _lib_name("cudart", cpu_value),
-            "%{cublas_lib}": _lib_name("cublas", cpu_value),
-            "%{cusolver_lib}": _lib_name("cusolver", cpu_value),
-            "%{cudnn_lib}": _lib_name("cudnn", cpu_value),
-            "%{cufft_lib}": _lib_name("cufft", cpu_value),
-            "%{curand_lib}": _lib_name("curand", cpu_value),
-            "%{cupti_lib}": _lib_name("cupti", cpu_value),
-            "%{cuda_include_genrules}": "",
-            "%{cuda_headers}": "",
-        },
-    )
+  # Create dummy files for the CUDA toolkit since they are still required by
+  # tensorflow/core/platform/default/build_config:cuda.
+  repository_ctx.file("cuda/cuda/include/cuda.h", "")
+  repository_ctx.file("cuda/cuda/include/cublas.h", "")
+  repository_ctx.file("cuda/cuda/include/cudnn.h", "")
+  repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h", "")
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cuda", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart", cpu_value))
+  repository_ctx.file(
+      "cuda/cuda/lib/%s" % _lib_name("cudart_static", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cublas", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cusolver", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudnn", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("curand", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cufft", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cupti", cpu_value))
+
+  # Set up cuda_config.h, which is used by
+  # tensorflow/stream_executor/dso_loader.cc.
+  _tpl(
+      repository_ctx,
+      "cuda:cuda_config.h",
+      {
+          "%{cuda_version}":
+              _DEFAULT_CUDA_VERSION,
+          "%{cudnn_version}":
+              _DEFAULT_CUDNN_VERSION,
+          "%{cuda_compute_capabilities}":
+              ",".join([
+                  "CudaVersion(\"%s\")" % c
+                  for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES
+              ]),
+          "%{cuda_toolkit_path}":
+              _DEFAULT_CUDA_TOOLKIT_PATH,
+      },
+      "cuda/cuda/cuda_config.h",
+  )
 
-    # Create dummy files for the CUDA toolkit since they are still required by
-    # tensorflow/core/platform/default/build_config:cuda.
-    repository_ctx.file("cuda/cuda/include/cuda.h", "")
-    repository_ctx.file("cuda/cuda/include/cublas.h", "")
-    repository_ctx.file("cuda/cuda/include/cudnn.h", "")
-    repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h", "")
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cuda", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart_static", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cublas", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cusolver", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudnn", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("curand", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cufft", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cupti", cpu_value))
-
-    # Set up cuda_config.h, which is used by
-    # tensorflow/stream_executor/dso_loader.cc.
-    _tpl(
-        repository_ctx,
-        "cuda:cuda_config.h",
-        {
-            "%{cuda_version}": _DEFAULT_CUDA_VERSION,
-            "%{cudnn_version}": _DEFAULT_CUDNN_VERSION,
-            "%{cuda_compute_capabilities}": ",".join([
-                "CudaVersion(\"%s\")" % c
-                for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES
-            ]),
-            "%{cuda_toolkit_path}": _DEFAULT_CUDA_TOOLKIT_PATH,
-        },
-        "cuda/cuda/cuda_config.h",
-    )
+  # If cuda_configure is not configured to build with GPU support, and the user
+  # attempts to build with --config=cuda, add a dummy build rule to intercept
+  # this and fail with an actionable error message.
+  repository_ctx.file(
+      "crosstool/error_gpu_disabled.bzl",
+      _DUMMY_CROSSTOOL_BZL_FILE,
+  )
+  repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
 
-    # If cuda_configure is not configured to build with GPU support, and the user
-    # attempts to build with --config=cuda, add a dummy build rule to intercept
-    # this and fail with an actionable error message.
-    repository_ctx.file(
-        "crosstool/error_gpu_disabled.bzl",
-        _DUMMY_CROSSTOOL_BZL_FILE,
-    )
-    repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
 
 def _execute(
         repository_ctx,
@@ -1040,35 +1119,35 @@ def _execute(
         error_msg = None,
         error_details = None,
         empty_stdout_fine = False):
-    """Executes an arbitrary shell command.
+  """Executes an arbitrary shell command.
 
     Args:
       repository_ctx: the repository_ctx object
       cmdline: list of strings, the command to execute
       error_msg: string, a summary of the error if the command fails
       error_details: string, details about the error or steps to fix it
-      empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
-        it's an error
-    Return:
-      the result of repository_ctx.execute(cmdline)
-    """
-    result = repository_ctx.execute(cmdline)
-    if result.stderr or not (empty_stdout_fine or result.stdout):
-        auto_configure_fail(
-            "\n".join([
-                error_msg.strip() if error_msg else "Repository command failed",
-                result.stderr.strip(),
-                error_details if error_details else "",
-            ]),
-        )
-    return result
+      empty_stdout_fine: bool, if True, an empty stdout result is fine,
+        otherwise it's an error
+    Return: the result of repository_ctx.execute(cmdline)
+  """
+  result = repository_ctx.execute(cmdline)
+  if result.stderr or not (empty_stdout_fine or result.stdout):
+    auto_configure_fail(
+        "\n".join([
+            error_msg.strip() if error_msg else "Repository command failed",
+            result.stderr.strip(),
+            error_details if error_details else "",
+        ]),)
+  return result
+
 
 def _norm_path(path):
-    """Returns a path with '/' and remove the trailing slash."""
-    path = path.replace("\\", "/")
-    if path[-1] == "/":
-        path = path[:-1]
-    return path
+  """Returns a path with '/' and remove the trailing slash."""
+  path = path.replace("\\", "/")
+  if path[-1] == "/":
+    path = path[:-1]
+  return path
+
 
 def symlink_genrule_for_dir(
         repository_ctx,
@@ -1077,167 +1156,174 @@ def symlink_genrule_for_dir(
         genrule_name,
         src_files = [],
         dest_files = []):
-    """Returns a genrule to symlink(or copy if on Windows) a set of files.
+  """Returns a genrule to symlink(or copy if on Windows) a set of files.
 
     If src_dir is passed, files will be read from the given directory; otherwise
     we assume files are in src_files and dest_files
     """
-    if src_dir != None:
-        src_dir = _norm_path(src_dir)
-        dest_dir = _norm_path(dest_dir)
-        files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines()))
-
-        # Create a list with the src_dir stripped to use for outputs.
-        dest_files = files.replace(src_dir, "").splitlines()
-        src_files = files.splitlines()
-    command = []
-    if not _is_windows(repository_ctx):
-        # We clear folders that might have been generated previously to avoid
-        # undesired inclusions
-        command.append('if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi')
-        command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
-        command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
-        command.append('if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi')
-    outs = []
-    for i in range(len(dest_files)):
-        if dest_files[i] != "":
-            # If we have only one file to link we do not want to use the dest_dir, as
-            # $(@D) will include the full path to the file.
-            dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i]
-
-            # On Windows, symlink is not supported, so we just copy all the files.
-            cmd = "cp -f" if _is_windows(repository_ctx) else "ln -s"
-            command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
-            outs.append('        "' + dest_dir + dest_files[i] + '",')
-    genrule = _genrule(
-        src_dir,
-        genrule_name,
-        " && ".join(command),
-        "\n".join(outs),
-    )
-    return genrule
+  if src_dir != None:
+    src_dir = _norm_path(src_dir)
+    dest_dir = _norm_path(dest_dir)
+    files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines()))
+
+    # Create a list with the src_dir stripped to use for outputs.
+    dest_files = files.replace(src_dir, "").splitlines()
+    src_files = files.splitlines()
+  command = []
+  if not _is_windows(repository_ctx):
+    # We clear folders that might have been generated previously to avoid
+    # undesired inclusions
+    command.append('if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi')
+    command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
+    command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
+    command.append('if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi')
+  outs = []
+  for i in range(len(dest_files)):
+    if dest_files[i] != "":
+      # If we have only one file to link we do not want to use the dest_dir, as
+      # $(@D) will include the full path to the file.
+      dest = "$(@D)/" + dest_dir + dest_files[i] if len(
+          dest_files) != 1 else "$(@D)/" + dest_files[i]
+
+      # Copy the headers to create a sandboxable setup.
+      cmd = "cp -f"
+      command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
+      outs.append('        "' + dest_dir + dest_files[i] + '",')
+  genrule = _genrule(
+      src_dir,
+      genrule_name,
+      " && ".join(command),
+      "\n".join(outs),
+  )
+  return genrule
+
 
 def _genrule(src_dir, genrule_name, command, outs):
-    """Returns a string with a genrule.
+  """Returns a string with a genrule.
 
     Genrule executes the given command and produces the given outputs.
     """
-    return (
-        "genrule(\n" +
-        '    name = "' +
-        genrule_name + '",\n' +
-        "    outs = [\n" +
-        outs +
-        "\n    ],\n" +
-        '    cmd = """\n' +
-        command +
-        '\n   """,\n' +
-        ")\n"
-    )
+  return (
+      "genrule(\n" + '    name = "' + genrule_name + '",\n' + "    outs = [\n" +
+      outs + "\n    ],\n" + '    cmd = """\n' + command + '\n   """,\n' + ")\n")
+
 
 def _read_dir(repository_ctx, src_dir):
-    """Returns a string with all files in a directory.
+  """Returns a string with all files in a directory.
 
     Finds all files inside a directory, traversing subfolders and following
     symlinks. The returned string contains the full path of all files
     separated by line breaks.
     """
-    if _is_windows(repository_ctx):
-        src_dir = src_dir.replace("/", "\\")
-        find_result = _execute(
-            repository_ctx,
-            ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
-            empty_stdout_fine = True,
-        )
+  if _is_windows(repository_ctx):
+    src_dir = src_dir.replace("/", "\\")
+    find_result = _execute(
+        repository_ctx,
+        ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
+        empty_stdout_fine=True,
+    )
+
+    # src_files will be used in genrule.outs where the paths must
+    # use forward slashes.
+    result = find_result.stdout.replace("\\", "/")
+  else:
+    find_result = _execute(
+        repository_ctx,
+        ["find", src_dir, "-follow", "-type", "f"],
+        empty_stdout_fine=True,
+    )
+    result = find_result.stdout
+  return result
 
-        # src_files will be used in genrule.outs where the paths must
-        # use forward slashes.
-        result = find_result.stdout.replace("\\", "/")
-    else:
-        find_result = _execute(
-            repository_ctx,
-            ["find", src_dir, "-follow", "-type", "f"],
-            empty_stdout_fine = True,
-        )
-        result = find_result.stdout
-    return result
 
 def _flag_enabled(repository_ctx, flag_name):
-    if flag_name in repository_ctx.os.environ:
-        value = repository_ctx.os.environ[flag_name].strip()
-        return value == "1"
-    return False
+  if flag_name in repository_ctx.os.environ:
+    value = repository_ctx.os.environ[flag_name].strip()
+    return value == "1"
+  return False
+
 
 def _use_cuda_clang(repository_ctx):
-    return _flag_enabled(repository_ctx, "TF_CUDA_CLANG")
+  return _flag_enabled(repository_ctx, "TF_CUDA_CLANG")
+
 
 def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
-    if _use_cuda_clang(repository_ctx):
-        capability_flags = ["--cuda-gpu-arch=sm_" +
-                            cap.replace(".", "") for cap in compute_capabilities]
-    else:
-        # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc
-        capability_flags = []
-    return str(capability_flags)
+  if _use_cuda_clang(repository_ctx):
+    capability_flags = [
+        "--cuda-gpu-arch=sm_" + cap.replace(".", "")
+        for cap in compute_capabilities
+    ]
+  else:
+    # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc
+    # TODO(csigg): Make this consistent with cuda clang and pass to crosstool.
+    capability_flags = []
+  return str(capability_flags)
+
 
 def _create_local_cuda_repository(repository_ctx):
-    """Creates the repository containing files set up to build with CUDA."""
-    cuda_config = _get_cuda_config(repository_ctx)
+  """Creates the repository containing files set up to build with CUDA."""
+  cuda_config = _get_cuda_config(repository_ctx)
 
-    cuda_include_path = _find_cuda_include_path(repository_ctx, cuda_config)
-    cudnn_header_dir = _find_cudnn_header_dir(
-        repository_ctx,
-        cuda_config.cudnn_install_basedir,
-    )
-    cupti_header_dir = _find_cupti_header_dir(repository_ctx, cuda_config)
-    nvvm_libdevice_dir = _find_nvvm_libdevice_dir(repository_ctx, cuda_config)
-
-    # Set up symbolic links for the cuda toolkit by creating genrules to do
-    # symlinking. We create one genrule for each directory we want to track under
-    # cuda_toolkit_path
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    genrules = [symlink_genrule_for_dir(
-        repository_ctx,
-        cuda_include_path,
-        "cuda/include",
-        "cuda-include",
-    )]
-    genrules.append(symlink_genrule_for_dir(
-        repository_ctx,
-        nvvm_libdevice_dir,
-        "cuda/nvvm/libdevice",
-        "cuda-nvvm",
-    ))
-    genrules.append(symlink_genrule_for_dir(
-        repository_ctx,
-        cupti_header_dir,
-        "cuda/extras/CUPTI/include",
-        "cuda-extras",
-    ))
-
-    cuda_libs = _find_libs(repository_ctx, cuda_config)
-    cuda_lib_src = []
-    cuda_lib_dest = []
-    for lib in cuda_libs.values():
-        cuda_lib_src.append(lib.path)
-        cuda_lib_dest.append("cuda/lib/" + lib.file_name)
-    genrules.append(symlink_genrule_for_dir(
-        repository_ctx,
-        None,
-        "",
-        "cuda-lib",
-        cuda_lib_src,
-        cuda_lib_dest,
-    ))
-
-    # Set up the symbolic links for cudnn if cndnn was not installed to
-    # CUDA_TOOLKIT_PATH.
-    included_files = _read_dir(repository_ctx, cuda_include_path).replace(
-        cuda_include_path,
-        "",
-    ).splitlines()
-    if "/cudnn.h" not in included_files:
-        genrules.append(symlink_genrule_for_dir(
+  cuda_include_path = _find_cuda_include_path(repository_ctx, cuda_config)
+  cudnn_header_dir = _find_cudnn_header_dir(
+      repository_ctx,
+      cuda_config.cudnn_install_basedir,
+  )
+  cupti_header_dir = _find_cupti_header_dir(repository_ctx, cuda_config)
+  nvvm_libdevice_dir = _find_nvvm_libdevice_dir(repository_ctx, cuda_config)
+
+  # Set up symbolic links for the cuda toolkit by creating genrules to do
+  # symlinking. We create one genrule for each directory we want to track under
+  # cuda_toolkit_path
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  genrules = [
+      symlink_genrule_for_dir(
+          repository_ctx,
+          cuda_include_path,
+          "cuda/include",
+          "cuda-include",
+      )
+  ]
+  genrules.append(
+      symlink_genrule_for_dir(
+          repository_ctx,
+          nvvm_libdevice_dir,
+          "cuda/nvvm/libdevice",
+          "cuda-nvvm",
+      ))
+  genrules.append(
+      symlink_genrule_for_dir(
+          repository_ctx,
+          cupti_header_dir,
+          "cuda/extras/CUPTI/include",
+          "cuda-extras",
+      ))
+
+  cuda_libs = _find_libs(repository_ctx, cuda_config)
+  cuda_lib_src = []
+  cuda_lib_dest = []
+  for lib in cuda_libs.values():
+    cuda_lib_src.append(lib.path)
+    cuda_lib_dest.append("cuda/lib/" + lib.file_name)
+  genrules.append(
+      symlink_genrule_for_dir(
+          repository_ctx,
+          None,
+          "",
+          "cuda-lib",
+          cuda_lib_src,
+          cuda_lib_dest,
+      ))
+
+  # Set up the symbolic links for cudnn if cndnn was not installed to
+  # CUDA_TOOLKIT_PATH.
+  included_files = _read_dir(repository_ctx, cuda_include_path).replace(
+      cuda_include_path,
+      "",
+  ).splitlines()
+  if "/cudnn.h" not in included_files:
+    genrules.append(
+        symlink_genrule_for_dir(
             repository_ctx,
             None,
             "cuda/include/",
@@ -1245,217 +1331,229 @@ def _create_local_cuda_repository(repository_ctx):
             [cudnn_header_dir + "/cudnn.h"],
             ["cudnn.h"],
         ))
-    else:
-        genrules.append(
-            "filegroup(\n" +
-            '    name = "cudnn-include",\n' +
-            "    srcs = [],\n" +
-            ")\n",
-        )
-
-    # Set up BUILD file for cuda/
-    _tpl(
-        repository_ctx,
-        "cuda:build_defs.bzl",
-        {
-            "%{cuda_is_configured}": "True",
-            "%{cuda_extra_copts}": _compute_cuda_extra_copts(
-                repository_ctx,
-                cuda_config.compute_capabilities,
-            ),
-        },
-    )
-    _tpl(
-        repository_ctx,
-        "cuda:BUILD.windows" if _is_windows(repository_ctx) else "cuda:BUILD",
-        {
-            "%{cuda_driver_lib}": cuda_libs["cuda"].file_name,
-            "%{cudart_static_lib}": cuda_libs["cudart_static"].file_name,
-            "%{cudart_static_linkopt}": _cudart_static_linkopt(
-                cuda_config.cpu_value,
-            ),
-            "%{cudart_lib}": cuda_libs["cudart"].file_name,
-            "%{cublas_lib}": cuda_libs["cublas"].file_name,
-            "%{cusolver_lib}": cuda_libs["cusolver"].file_name,
-            "%{cudnn_lib}": cuda_libs["cudnn"].file_name,
-            "%{cufft_lib}": cuda_libs["cufft"].file_name,
-            "%{curand_lib}": cuda_libs["curand"].file_name,
-            "%{cupti_lib}": cuda_libs["cupti"].file_name,
-            "%{cuda_include_genrules}": "\n".join(genrules),
-            "%{cuda_headers}": ('":cuda-include",\n' +
-                                '        ":cudnn-include",'),
-        },
-        "cuda/BUILD",
-    )
-
-    is_cuda_clang = _use_cuda_clang(repository_ctx)
+  else:
+    genrules.append(
+        "filegroup(\n" + '    name = "cudnn-include",\n' + "    srcs = [],\n" +
+        ")\n",)
+
+  # Set up BUILD file for cuda/
+  _tpl(
+      repository_ctx,
+      "cuda:build_defs.bzl",
+      {
+          "%{cuda_is_configured}":
+              "True",
+          "%{cuda_extra_copts}":
+              _compute_cuda_extra_copts(
+                  repository_ctx,
+                  cuda_config.compute_capabilities,
+              ),
+      },
+  )
+  _tpl(
+      repository_ctx,
+      "cuda:BUILD.windows" if _is_windows(repository_ctx) else "cuda:BUILD",
+      {
+          "%{cuda_driver_lib}":
+              cuda_libs["cuda"].file_name,
+          "%{cudart_static_lib}":
+              cuda_libs["cudart_static"].file_name,
+          "%{cudart_static_linkopt}":
+              _cudart_static_linkopt(cuda_config.cpu_value,),
+          "%{cudart_lib}":
+              cuda_libs["cudart"].file_name,
+          "%{cublas_lib}":
+              cuda_libs["cublas"].file_name,
+          "%{cusolver_lib}":
+              cuda_libs["cusolver"].file_name,
+          "%{cudnn_lib}":
+              cuda_libs["cudnn"].file_name,
+          "%{cufft_lib}":
+              cuda_libs["cufft"].file_name,
+          "%{curand_lib}":
+              cuda_libs["curand"].file_name,
+          "%{cupti_lib}":
+              cuda_libs["cupti"].file_name,
+          "%{cuda_include_genrules}":
+              "\n".join(genrules),
+          "%{cuda_headers}": ('":cuda-include",\n' + '        ":cudnn-include",'
+                             ),
+      },
+      "cuda/BUILD",
+  )
 
-    should_download_clang = is_cuda_clang and _flag_enabled(
-        repository_ctx,
-        _TF_DOWNLOAD_CLANG,
-    )
-    if should_download_clang:
-        download_clang(repository_ctx, "crosstool/extra_tools")
-
-    # Set up crosstool/
-    cc = find_cc(repository_ctx)
-    cc_fullpath = cc if not should_download_clang else "crosstool/" + cc
-
-    host_compiler_includes = _host_compiler_includes(repository_ctx, cc_fullpath)
-    cuda_defines = {}
-    # Bazel sets '-B/usr/bin' flag to workaround build errors on RHEL (see
-    # https://github.com/bazelbuild/bazel/issues/760).
-    # However, this stops our custom clang toolchain from picking the provided
-    # LLD linker, so we're only adding '-B/usr/bin' when using non-downloaded
-    # toolchain.
-    # TODO: when bazel stops adding '-B/usr/bin' by default, remove this
-    #       flag from the CROSSTOOL completely (see
-    #       https://github.com/bazelbuild/bazel/issues/5634)
-    if should_download_clang:
-      cuda_defines["%{linker_bin_path_flag}"] = ""
-    else:
-      cuda_defines["%{linker_bin_path_flag}"] = 'flag: "-B/usr/bin"'
+  is_cuda_clang = _use_cuda_clang(repository_ctx)
 
-    if is_cuda_clang:
-        cuda_defines["%{host_compiler_path}"] = str(cc)
-        cuda_defines["%{host_compiler_warnings}"] = """
+  should_download_clang = is_cuda_clang and _flag_enabled(
+      repository_ctx,
+      _TF_DOWNLOAD_CLANG,
+  )
+  if should_download_clang:
+    download_clang(repository_ctx, "crosstool/extra_tools")
+
+  # Set up crosstool/
+  cc = find_cc(repository_ctx)
+  cc_fullpath = cc if not should_download_clang else "crosstool/" + cc
+
+  host_compiler_includes = _host_compiler_includes(repository_ctx, cc_fullpath)
+  cuda_defines = {}
+  # Bazel sets '-B/usr/bin' flag to workaround build errors on RHEL (see
+  # https://github.com/bazelbuild/bazel/issues/760).
+  # However, this stops our custom clang toolchain from picking the provided
+  # LLD linker, so we're only adding '-B/usr/bin' when using non-downloaded
+  # toolchain.
+  # TODO: when bazel stops adding '-B/usr/bin' by default, remove this
+  #       flag from the CROSSTOOL completely (see
+  #       https://github.com/bazelbuild/bazel/issues/5634)
+  if should_download_clang:
+    cuda_defines["%{linker_bin_path_flag}"] = ""
+  else:
+    cuda_defines["%{linker_bin_path_flag}"] = 'flag: "-B/usr/bin"'
+
+  if is_cuda_clang:
+    cuda_defines["%{host_compiler_path}"] = str(cc)
+    cuda_defines["%{host_compiler_warnings}"] = """
         # Some parts of the codebase set -Werror and hit this warning, so
         # switch it off for now.
         flag: "-Wno-invalid-partial-specialization"
     """
-        cuda_defines["%{host_compiler_includes}"] = host_compiler_includes
-        _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"})
-        repository_ctx.file("crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", "")
-        repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "")
-        repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.bat", "")
-    else:
-        cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
-        cuda_defines["%{host_compiler_warnings}"] = ""
-
-        # TODO(klimek): We currently need to inject "/" as builtin directory path
-        # to disable bazel's dependency checks.
-        # The problem is that:
-        # - the python rules symlink the python headers into the bazel root
-        # - the rules use 'includes' in the BUILD file to redirect includes of the
-        #   python headers through those paths
-        # - bazel currently uses -isystem for include paths specified via 'includes'
-        # - gcc follows symlinks when resolving files via -isystem paths, and puts
-        #   the resolved paths into the .d file, which makes the dependency check
-        #   fail for bazel
-        # There are multiple possible ways to solve this:
-        # 1. make bazel not use -isystem for paths specified via 'includes'
-        # 2. cp the headers instead of symlinking them
-        #
-        # Once this is fixed, the right builtin directory path is:
-        # (host_compiler_includes +
-        #    "\n  cxx_builtin_include_directory: \"%s\"" % cuda_include_path)
-        # The cuda directory needs to be passed, as there is currently no rule
-        # providing the cuda headers in the same way the python headers are
-        # provided.
-        cuda_defines["%{host_compiler_includes}"] = "\n  cxx_builtin_include_directory: \"/\""
-        nvcc_path = str(repository_ctx.path("%s/bin/nvcc%s" %
-                                            (
-                                                cuda_config.cuda_toolkit_path,
-                                                ".exe" if _is_windows(repository_ctx) else "",
-                                            )))
-        _tpl(
-            repository_ctx,
-            "crosstool:BUILD",
-            {
-                "%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc",
-                "%{win_linker_files}": ":windows_msvc_wrapper_files",
-            },
-        )
-        wrapper_defines = {
-            "%{cpu_compiler}": str(cc),
-            "%{cuda_version}": cuda_config.cuda_version,
-            "%{nvcc_path}": nvcc_path,
-            "%{gcc_host_compiler_path}": str(cc),
-            "%{cuda_compute_capabilities}": ", ".join(
-                ["\"%s\"" % c for c in cuda_config.compute_capabilities],
-            ),
-            "%{nvcc_tmp_dir}": _get_nvcc_tmp_dir_for_windows(repository_ctx),
-        }
-        _tpl(
-            repository_ctx,
-            "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
-            wrapper_defines,
-        )
-        _tpl(
-            repository_ctx,
-            "crosstool:windows/msvc_wrapper_for_nvcc.py",
-            wrapper_defines,
-        )
-        _tpl(
-            repository_ctx,
-            "crosstool:windows/msvc_wrapper_for_nvcc.bat",
-            {
-                "%{python_binary}": _get_python_bin(repository_ctx),
-            },
-        )
-
+    cuda_defines["%{host_compiler_includes}"] = host_compiler_includes
+    _tpl(repository_ctx, "crosstool:BUILD", {
+        "%{linker_files}": ":empty",
+        "%{win_linker_files}": ":empty"
+    })
+    repository_ctx.file(
+        "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", "")
+    repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "")
+    repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.bat", "")
+  else:
+    cuda_defines[
+        "%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
+    cuda_defines["%{host_compiler_warnings}"] = ""
+
+    # nvcc has the system include paths built in and will automatically
+    # search them; we cannot work around that, so we add the relevant cuda
+    # system paths to the allowed compiler specific include paths.
+    cuda_defines["%{host_compiler_includes}"] = (
+        host_compiler_includes + "\n" + _cuda_include_path(
+            repository_ctx, cuda_config) +
+        "\n  cxx_builtin_include_directory: \"%s\"" % cupti_header_dir +
+        "\n  cxx_builtin_include_directory: \"%s\"" % cudnn_header_dir)
+    nvcc_path = str(
+        repository_ctx.path("%s/bin/nvcc%s" % (
+            cuda_config.cuda_toolkit_path,
+            ".exe" if _is_windows(repository_ctx) else "",
+        )))
     _tpl(
         repository_ctx,
-        "crosstool:CROSSTOOL",
-        cuda_defines + _get_win_cuda_defines(repository_ctx),
-        out = "crosstool/CROSSTOOL",
+        "crosstool:BUILD",
+        {
+            "%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc",
+            "%{win_linker_files}": ":windows_msvc_wrapper_files",
+        },
     )
-
-    # Set up cuda_config.h, which is used by
-    # tensorflow/stream_executor/dso_loader.cc.
+    wrapper_defines = {
+        "%{cpu_compiler}":
+            str(cc),
+        "%{cuda_version}":
+            cuda_config.cuda_version,
+        "%{nvcc_path}":
+            nvcc_path,
+        "%{gcc_host_compiler_path}":
+            str(cc),
+        "%{cuda_compute_capabilities}":
+            ", ".join(
+                ["\"%s\"" % c for c in cuda_config.compute_capabilities],),
+        "%{nvcc_tmp_dir}":
+            _get_nvcc_tmp_dir_for_windows(repository_ctx),
+    }
     _tpl(
         repository_ctx,
-        "cuda:cuda_config.h",
-        {
-            "%{cuda_version}": cuda_config.cuda_version,
-            "%{cudnn_version}": cuda_config.cudnn_version,
-            "%{cuda_compute_capabilities}": ",".join(
-                [
-                    "CudaVersion(\"%s\")" % c
-                    for c in cuda_config.compute_capabilities
-                ],
-            ),
-            "%{cuda_toolkit_path}": cuda_config.cuda_toolkit_path,
-        },
-        "cuda/cuda/cuda_config.h",
+        "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
+        wrapper_defines,
     )
-
-def _create_remote_cuda_repository(repository_ctx, remote_config_repo):
-    """Creates pointers to a remotely configured repo set up to build with CUDA."""
     _tpl(
         repository_ctx,
-        "cuda:build_defs.bzl",
-        {
-            "%{cuda_is_configured}": "True",
-            "%{cuda_extra_copts}": _compute_cuda_extra_copts(
-                repository_ctx,
-                _compute_capabilities(repository_ctx),
-            ),
-        },
+        "crosstool:windows/msvc_wrapper_for_nvcc.py",
+        wrapper_defines,
     )
     _tpl(
         repository_ctx,
-        "cuda:remote.BUILD",
+        "crosstool:windows/msvc_wrapper_for_nvcc.bat",
         {
-            "%{remote_cuda_repo}": remote_config_repo,
+            "%{python_binary}": _get_python_bin(repository_ctx),
         },
-        "cuda/BUILD",
     )
-    _tpl(repository_ctx, "crosstool:remote.BUILD", {
-        "%{remote_cuda_repo}": remote_config_repo,
-    }, "crosstool/BUILD")
+
+  _tpl(
+      repository_ctx,
+      "crosstool:CROSSTOOL",
+      cuda_defines + _get_win_cuda_defines(repository_ctx),
+      out="crosstool/CROSSTOOL",
+  )
+
+  # Set up cuda_config.h, which is used by
+  # tensorflow/stream_executor/dso_loader.cc.
+  _tpl(
+      repository_ctx,
+      "cuda:cuda_config.h",
+      {
+          "%{cuda_version}":
+              cuda_config.cuda_version,
+          "%{cudnn_version}":
+              cuda_config.cudnn_version,
+          "%{cuda_compute_capabilities}":
+              ",".join([
+                  "CudaVersion(\"%s\")" % c
+                  for c in cuda_config.compute_capabilities
+              ],),
+          "%{cuda_toolkit_path}":
+              cuda_config.cuda_toolkit_path,
+      },
+      "cuda/cuda/cuda_config.h",
+  )
+
+
+def _create_remote_cuda_repository(repository_ctx, remote_config_repo):
+  """Creates pointers to a remotely configured repo set up to build with CUDA."""
+  _tpl(
+      repository_ctx,
+      "cuda:build_defs.bzl",
+      {
+          "%{cuda_is_configured}":
+              "True",
+          "%{cuda_extra_copts}":
+              _compute_cuda_extra_copts(
+                  repository_ctx,
+                  compute_capabilities(repository_ctx),
+              ),
+      },
+  )
+  _tpl(
+      repository_ctx,
+      "cuda:remote.BUILD",
+      {
+          "%{remote_cuda_repo}": remote_config_repo,
+      },
+      "cuda/BUILD",
+  )
+  _tpl(repository_ctx, "crosstool:remote.BUILD", {
+      "%{remote_cuda_repo}": remote_config_repo,
+  }, "crosstool/BUILD")
+
 
 def _cuda_autoconf_impl(repository_ctx):
-    """Implementation of the cuda_autoconf repository rule."""
-    if not _enable_cuda(repository_ctx):
-        _create_dummy_repository(repository_ctx)
-    elif _TF_CUDA_CONFIG_REPO in repository_ctx.os.environ:
-        _create_remote_cuda_repository(
-            repository_ctx,
-            repository_ctx.os.environ[_TF_CUDA_CONFIG_REPO],
-        )
-    else:
-        _create_local_cuda_repository(repository_ctx)
+  """Implementation of the cuda_autoconf repository rule."""
+  if not _enable_cuda(repository_ctx):
+    _create_dummy_repository(repository_ctx)
+  elif _TF_CUDA_CONFIG_REPO in repository_ctx.os.environ:
+    _create_remote_cuda_repository(
+        repository_ctx,
+        repository_ctx.os.environ[_TF_CUDA_CONFIG_REPO],
+    )
+  else:
+    _create_local_cuda_repository(repository_ctx)
+
 
 cuda_configure = repository_rule(
     implementation = _cuda_autoconf_impl,
diff --git a/third_party/gpus/rocm/BUILD b/third_party/gpus/rocm/BUILD
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/third_party/gpus/rocm/BUILD
diff --git a/third_party/gpus/rocm/BUILD.tpl b/third_party/gpus/rocm/BUILD.tpl
new file mode 100644
index 0000000000..8258bb3589
--- /dev/null
+++ b/third_party/gpus/rocm/BUILD.tpl
@@ -0,0 +1,99 @@
+licenses(["restricted"])  # MPL2, portions GPL v3, LGPL v3, BSD-like
+
+package(default_visibility = ["//visibility:public"])
+
+config_setting(
+    name = "using_hipcc",
+    values = {
+        "define": "using_rocm_hipcc=true",
+    },
+)
+
+cc_library(
+    name = "rocm_headers",
+    hdrs = [
+        "rocm/rocm_config.h",
+        %{rocm_headers}
+    ],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "hip",
+    srcs = ["rocm/lib/%{hip_lib}"],
+    data = ["rocm/lib/%{hip_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "rocblas",
+    srcs = ["rocm/lib/%{rocblas_lib}"],
+    data = ["rocm/lib/%{rocblas_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "rocfft",
+    srcs = ["rocm/lib/%{rocfft_lib}"],
+    data = ["rocm/lib/%{rocfft_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "hiprand",
+    srcs = ["rocm/lib/%{hiprand_lib}"],
+    data = ["rocm/lib/%{hiprand_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+        "rocm/include/rocrand",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "miopen",
+    srcs = ["rocm/lib/%{miopen_lib}"],
+    data = ["rocm/lib/%{miopen_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "rocm",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":rocm_headers",
+        ":hip",
+        ":rocblas",
+        ":rocfft",
+        ":hiprand",
+        ":miopen",
+    ],
+)
+
+%{rocm_include_genrules}
diff --git a/third_party/gpus/rocm/build_defs.bzl.tpl b/third_party/gpus/rocm/build_defs.bzl.tpl
new file mode 100644
index 0000000000..08c59f95a0
--- /dev/null
+++ b/third_party/gpus/rocm/build_defs.bzl.tpl
@@ -0,0 +1,45 @@
+# Macros for building ROCm code.
+def if_rocm(if_true, if_false = []):
+    """Shorthand for select()'ing on whether we're building with ROCm.
+
+    Returns a select statement which evaluates to if_true if we're building
+    with ROCm enabled.  Otherwise, the select statement evaluates to if_false.
+
+    """
+    return select({
+        "@local_config_rocm//rocm:using_hipcc": if_true,
+        "//conditions:default": if_false
+    })
+
+
+def rocm_default_copts():
+    """Default options for all ROCm compilations."""
+    return if_rocm(["-x", "rocm"] + %{rocm_extra_copts})
+
+def rocm_copts(opts = []):
+    """Gets the appropriate set of copts for (maybe) ROCm compilation.
+
+      If we're doing ROCm compilation, returns copts for our particular ROCm
+      compiler.  If we're not doing ROCm compilation, returns an empty list.
+
+      """
+    return rocm_default_copts() + select({
+        "//conditions:default": [],
+        "@local_config_rocm//rocm:using_hipcc": ([
+            "",
+        ]),
+    }) + if_rocm_is_configured(opts)
+
+def rocm_is_configured():
+    """Returns true if ROCm was enabled during the configure process."""
+    return %{rocm_is_configured}
+
+def if_rocm_is_configured(x):
+    """Tests if the ROCm was enabled during the configure process.
+
+    Unlike if_rocm(), this does not require that we are building with
+    --config=rocm. Used to allow non-ROCm code to depend on ROCm libraries.
+    """
+    if rocm_is_configured():
+      return x
+    return []
diff --git a/third_party/gpus/rocm/rocm_config.h.tpl b/third_party/gpus/rocm/rocm_config.h.tpl
new file mode 100644
index 0000000000..c5f25a845c
--- /dev/null
+++ b/third_party/gpus/rocm/rocm_config.h.tpl
@@ -0,0 +1,21 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef ROCM_ROCM_CONFIG_H_
+#define ROCM_ROCM_CONFIG_H_
+
+#define TF_ROCM_TOOLKIT_PATH "/opt/rocm"
+
+#endif  // ROCM_ROCM_CONFIG_H_
diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl
new file mode 100644
index 0000000000..9108639b0b
--- /dev/null
+++ b/third_party/gpus/rocm_configure.bzl
@@ -0,0 +1,784 @@
+# -*- Python -*-
+"""Repository rule for ROCm autoconfiguration.
+
+`rocm_configure` depends on the following environment variables:
+
+  * `TF_NEED_ROCM`: Whether to enable building with ROCm.
+  * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
+  * `ROCM_TOOLKIT_PATH`: The path to the ROCm toolkit. Default is
+    `/opt/rocm`.
+  * `TF_ROCM_VERSION`: The version of the ROCm toolkit. If this is blank, then
+    use the system default.
+  * `TF_MIOPEN_VERSION`: The version of the MIOpen library.
+  * `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets. Default is
+    `gfx803,gfx900`.
+"""
+
+_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
+_ROCM_TOOLKIT_PATH = "ROCM_TOOLKIT_PATH"
+_TF_ROCM_VERSION = "TF_ROCM_VERSION"
+_TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION"
+_TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS"
+_TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO"
+
+_DEFAULT_ROCM_VERSION = ""
+_DEFAULT_MIOPEN_VERSION = ""
+_DEFAULT_ROCM_TOOLKIT_PATH = "/opt/rocm"
+_DEFAULT_ROCM_AMDGPU_TARGETS = ["gfx803", "gfx900"]
+
+def find_cc(repository_ctx):
+    """Find the C++ compiler."""
+
+    # Return a dummy value for GCC detection here to avoid error
+    target_cc_name = "gcc"
+    cc_path_envvar = _GCC_HOST_COMPILER_PATH
+    cc_name = target_cc_name
+
+    if cc_path_envvar in repository_ctx.os.environ:
+        cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
+        if cc_name_from_env:
+            cc_name = cc_name_from_env
+    if cc_name.startswith("/"):
+        # Absolute path, maybe we should make this supported by our which function.
+        return cc_name
+    cc = repository_ctx.which(cc_name)
+    if cc == None:
+        fail(("Cannot find {}, either correct your path or set the {}" +
+              " environment variable").format(target_cc_name, cc_path_envvar))
+    return cc
+
+_INC_DIR_MARKER_BEGIN = "#include <...>"
+
+def _cxx_inc_convert(path):
+    """Convert path returned by cc -E xc++ in a complete path."""
+    path = path.strip()
+    return path
+
+def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
+    """Compute the list of default C or C++ include directories."""
+    if lang_is_cpp:
+        lang = "c++"
+    else:
+        lang = "c"
+
+    # TODO: We pass -no-canonical-prefixes here to match the compiler flags,
+    #       but in rocm_clang CROSSTOOL file that is a `feature` and we should
+    #       handle the case when it's disabled and no flag is passed
+    result = repository_ctx.execute([
+        cc,
+        "-no-canonical-prefixes",
+        "-E",
+        "-x" + lang,
+        "-",
+        "-v",
+    ])
+    index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
+    if index1 == -1:
+        return []
+    index1 = result.stderr.find("\n", index1)
+    if index1 == -1:
+        return []
+    index2 = result.stderr.rfind("\n ")
+    if index2 == -1 or index2 < index1:
+        return []
+    index2 = result.stderr.find("\n", index2 + 1)
+    if index2 == -1:
+        inc_dirs = result.stderr[index1 + 1:]
+    else:
+        inc_dirs = result.stderr[index1 + 1:index2].strip()
+
+    return [
+        str(repository_ctx.path(_cxx_inc_convert(p)))
+        for p in inc_dirs.split("\n")
+    ]
+
+def get_cxx_inc_directories(repository_ctx, cc):
+    """Compute the list of default C and C++ include directories."""
+
+    # For some reason `clang -xc` sometimes returns include paths that are
+    # different from the ones from `clang -xc++`. (Symlink and a dir)
+    # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
+    includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
+    includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
+
+    includes_cpp_set = depset(includes_cpp)
+    return includes_cpp + [
+        inc
+        for inc in includes_c
+        if inc not in includes_cpp_set
+    ]
+
+def auto_configure_fail(msg):
+    """Output failure message when rocm configuration fails."""
+    red = "\033[0;31m"
+    no_color = "\033[0m"
+    fail("\n%sROCm Configuration Error:%s %s\n" % (red, no_color, msg))
+
+# END cc_configure common functions (see TODO above).
+
+def _host_compiler_includes(repository_ctx, cc):
+    """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
+
+    Args:
+      repository_ctx: The repository context.
+      cc: The path to the gcc host compiler.
+
+    Returns:
+      A string containing the cxx_builtin_include_directory for each of the gcc
+      host compiler include directories, which can be added to the CROSSTOOL
+      file.
+    """
+    inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
+
+    # Add numpy headers
+    inc_dirs.append("/usr/lib/python2.7/dist-packages/numpy/core/include")
+
+    entries = []
+    for inc_dir in inc_dirs:
+        entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+
+    # define TENSORFLOW_USE_ROCM
+    entries.append("  unfiltered_cxx_flag: \"-DTENSORFLOW_USE_ROCM\"")
+
+    return "\n".join(entries)
+
+def _rocm_include_path(repository_ctx, rocm_config):
+    """Generates the cxx_builtin_include_directory entries for rocm inc dirs.
+
+    Args:
+      repository_ctx: The repository context.
+      cc: The path to the gcc host compiler.
+
+    Returns:
+      A string containing the cxx_builtin_include_directory for each of the gcc
+      host compiler include directories, which can be added to the CROSSTOOL
+      file.
+    """
+    inc_dirs = []
+
+    # general ROCm include path
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/include")
+
+    # Add HSA headers
+    inc_dirs.append("/opt/rocm/hsa/include")
+
+    # Add HIP headers
+    inc_dirs.append("/opt/rocm/include/hip")
+    inc_dirs.append("/opt/rocm/include/hip/hcc_detail")
+
+    # Add rocrand and hiprand headers
+    inc_dirs.append("/opt/rocm/rocrand/include")
+    inc_dirs.append("/opt/rocm/hiprand/include")
+
+    # Add rocfft headers
+    inc_dirs.append("/opt/rocm/rocfft/include")
+
+    # Add rocBLAS headers
+    inc_dirs.append("/opt/rocm/rocblas/include")
+
+    # Add MIOpen headers
+    inc_dirs.append("/opt/rocm/miopen/include")
+
+    # Add hcc headers
+    inc_dirs.append("/opt/rocm/hcc/include")
+    inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/7.0.0/include/")
+    inc_dirs.append("/opt/rocm/hcc/lib/clang/7.0.0/include")
+
+    # Newer hcc builds use/are based off of clang 8.0.0.
+    inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/8.0.0/include/")
+    inc_dirs.append("/opt/rocm/hcc/lib/clang/8.0.0/include")
+
+    inc_entries = []
+    for inc_dir in inc_dirs:
+        inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+    return "\n".join(inc_entries)
+
+def _enable_rocm(repository_ctx):
+    if "TF_NEED_ROCM" in repository_ctx.os.environ:
+        enable_rocm = repository_ctx.os.environ["TF_NEED_ROCM"].strip()
+        return enable_rocm == "1"
+    return False
+
+def _rocm_toolkit_path(repository_ctx):
+    """Finds the rocm toolkit directory.
+
+    Args:
+      repository_ctx: The repository context.
+
+    Returns:
+      A speculative real path of the rocm toolkit install directory.
+    """
+    rocm_toolkit_path = _DEFAULT_ROCM_TOOLKIT_PATH
+    if _ROCM_TOOLKIT_PATH in repository_ctx.os.environ:
+        rocm_toolkit_path = repository_ctx.os.environ[_ROCM_TOOLKIT_PATH].strip()
+    if not repository_ctx.path(rocm_toolkit_path).exists:
+        auto_configure_fail("Cannot find rocm toolkit path.")
+    return str(repository_ctx.path(rocm_toolkit_path).realpath)
+
+def _amdgpu_targets(repository_ctx):
+    """Returns a list of strings representing AMDGPU targets."""
+    if _TF_ROCM_AMDGPU_TARGETS not in repository_ctx.os.environ:
+        return _DEFAULT_ROCM_AMDGPU_TARGETS
+    amdgpu_targets_str = repository_ctx.os.environ[_TF_ROCM_AMDGPU_TARGETS]
+    amdgpu_targets = amdgpu_targets_str.split(",")
+    for amdgpu_target in amdgpu_targets:
+        if amdgpu_target[:3] != "gfx" or not amdgpu_target[3:].isdigit():
+            auto_configure_fail("Invalid AMDGPU target: %s" % amdgpu_target)
+    return amdgpu_targets
+
+def _cpu_value(repository_ctx):
+    """Returns the name of the host operating system.
+
+    Args:
+      repository_ctx: The repository context.
+
+    Returns:
+      A string containing the name of the host operating system.
+    """
+    os_name = repository_ctx.os.name.lower()
+    if os_name.startswith("mac os"):
+        return "Darwin"
+    if os_name.find("windows") != -1:
+        return "Windows"
+    result = repository_ctx.execute(["uname", "-s"])
+    return result.stdout.strip()
+
+def _lib_name(lib, cpu_value, version = "", static = False):
+    """Constructs the platform-specific name of a library.
+
+    Args:
+      lib: The name of the library, such as "hip"
+      cpu_value: The name of the host operating system.
+      version: The version of the library.
+      static: True the library is static or False if it is a shared object.
+
+    Returns:
+      The platform-specific name of the library.
+    """
+    if cpu_value in ("Linux"):
+        if static:
+            return "lib%s.a" % lib
+        else:
+            if version:
+                version = ".%s" % version
+            return "lib%s.so%s" % (lib, version)
+    elif cpu_value == "Windows":
+        return "%s.lib" % lib
+    elif cpu_value == "Darwin":
+        if static:
+            return "lib%s.a" % lib
+        elif version:
+            version = ".%s" % version
+        return "lib%s%s.dylib" % (lib, version)
+    else:
+        auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
+
+def _find_rocm_lib(
+        lib,
+        repository_ctx,
+        cpu_value,
+        basedir,
+        version = "",
+        static = False):
+    """Finds the given ROCm libraries on the system.
+
+    Args:
+      lib: The name of the library, such as "hip"
+      repository_ctx: The repository context.
+      cpu_value: The name of the host operating system.
+      basedir: The install directory of ROCm.
+      version: The version of the library.
+      static: True if static library, False if shared object.
+
+    Returns:
+      Returns a struct with the following fields:
+        file_name: The basename of the library found on the system.
+        path: The full path to the library.
+    """
+    file_name = _lib_name(lib, cpu_value, version, static)
+    if cpu_value == "Linux":
+        path = repository_ctx.path("%s/lib64/%s" % (basedir, file_name))
+        if path.exists:
+            return struct(file_name = file_name, path = str(path.realpath))
+        path = repository_ctx.path("%s/lib64/stubs/%s" % (basedir, file_name))
+        if path.exists:
+            return struct(file_name = file_name, path = str(path.realpath))
+        path = repository_ctx.path(
+            "%s/lib/x86_64-linux-gnu/%s" % (basedir, file_name),
+        )
+        if path.exists:
+            return struct(file_name = file_name, path = str(path.realpath))
+
+    path = repository_ctx.path("%s/lib/%s" % (basedir, file_name))
+    if path.exists:
+        return struct(file_name = file_name, path = str(path.realpath))
+    path = repository_ctx.path("%s/%s" % (basedir, file_name))
+    if path.exists:
+        return struct(file_name = file_name, path = str(path.realpath))
+
+    auto_configure_fail("Cannot find rocm library %s" % file_name)
+
+def _find_libs(repository_ctx, rocm_config):
+    """Returns the ROCm libraries on the system.
+
+    Args:
+      repository_ctx: The repository context.
+      rocm_config: The ROCm config as returned by _get_rocm_config
+
+    Returns:
+      Map of library names to structs of filename and path as returned by
+      _find_rocm_lib.
+    """
+    cpu_value = rocm_config.cpu_value
+    return {
+        "hip": _find_rocm_lib(
+            "hip_hcc",
+            repository_ctx,
+            cpu_value,
+            rocm_config.rocm_toolkit_path,
+        ),
+        "rocblas": _find_rocm_lib(
+            "rocblas",
+            repository_ctx,
+            cpu_value,
+            rocm_config.rocm_toolkit_path + "/rocblas",
+        ),
+        "rocfft": _find_rocm_lib(
+            "rocfft",
+            repository_ctx,
+            cpu_value,
+            rocm_config.rocm_toolkit_path + "/rocfft",
+        ),
+        "hiprand": _find_rocm_lib(
+            "hiprand",
+            repository_ctx,
+            cpu_value,
+            rocm_config.rocm_toolkit_path + "/hiprand",
+        ),
+        "miopen": _find_rocm_lib(
+            "MIOpen",
+            repository_ctx,
+            cpu_value,
+            rocm_config.rocm_toolkit_path + "/miopen",
+        ),
+    }
+
+def _get_rocm_config(repository_ctx):
+    """Detects and returns information about the ROCm installation on the system.
+
+    Args:
+      repository_ctx: The repository context.
+
+    Returns:
+      A struct containing the following fields:
+        rocm_toolkit_path: The ROCm toolkit installation directory.
+        amdgpu_targets: A list of the system's AMDGPU targets.
+        cpu_value: The name of the host operating system.
+    """
+    cpu_value = _cpu_value(repository_ctx)
+    rocm_toolkit_path = _rocm_toolkit_path(repository_ctx)
+    return struct(
+        rocm_toolkit_path = rocm_toolkit_path,
+        amdgpu_targets = _amdgpu_targets(repository_ctx),
+        cpu_value = cpu_value,
+    )
+
+def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
+    if not out:
+        out = tpl.replace(":", "/")
+    repository_ctx.template(
+        out,
+        Label("//third_party/gpus/%s.tpl" % tpl),
+        substitutions,
+    )
+
+def _file(repository_ctx, label):
+    repository_ctx.template(
+        label.replace(":", "/"),
+        Label("//third_party/gpus/%s.tpl" % label),
+        {},
+    )
+
+_DUMMY_CROSSTOOL_BZL_FILE = """
+def error_gpu_disabled():
+  fail("ERROR: Building with --config=rocm but TensorFlow is not configured " +
+       "to build with GPU support. Please re-run ./configure and enter 'Y' " +
+       "at the prompt to build with GPU support.")
+
+  native.genrule(
+      name = "error_gen_crosstool",
+      outs = ["CROSSTOOL"],
+      cmd = "echo 'Should not be run.' && exit 1",
+  )
+
+  native.filegroup(
+      name = "crosstool",
+      srcs = [":CROSSTOOL"],
+      output_licenses = ["unencumbered"],
+  )
+"""
+
+_DUMMY_CROSSTOOL_BUILD_FILE = """
+load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled")
+
+error_gpu_disabled()
+"""
+
+def _create_dummy_repository(repository_ctx):
+    cpu_value = _cpu_value(repository_ctx)
+
+    # Set up BUILD file for rocm/.
+    _tpl(
+        repository_ctx,
+        "rocm:build_defs.bzl",
+        {
+            "%{rocm_is_configured}": "False",
+            "%{rocm_extra_copts}": "[]",
+        },
+    )
+    _tpl(
+        repository_ctx,
+        "rocm:BUILD",
+        {
+            "%{hip_lib}": _lib_name("hip", cpu_value),
+            "%{rocblas_lib}": _lib_name("rocblas", cpu_value),
+            "%{miopen_lib}": _lib_name("miopen", cpu_value),
+            "%{rocfft_lib}": _lib_name("rocfft", cpu_value),
+            "%{hiprand_lib}": _lib_name("hiprand", cpu_value),
+            "%{rocm_include_genrules}": "",
+            "%{rocm_headers}": "",
+        },
+    )
+
+    # Create dummy files for the ROCm toolkit since they are still required by
+    # tensorflow/core/platform/default/build_config:rocm.
+    repository_ctx.file("rocm/hip/include/hip/hip_runtime.h", "")
+
+    # Set up rocm_config.h, which is used by
+    # tensorflow/stream_executor/dso_loader.cc.
+    _tpl(
+        repository_ctx,
+        "rocm:rocm_config.h",
+        {
+            "%{rocm_toolkit_path}": _DEFAULT_ROCM_TOOLKIT_PATH,
+        },
+        "rocm/rocm/rocm_config.h",
+    )
+
+    # If rocm_configure is not configured to build with GPU support, and the user
+    # attempts to build with --config=rocm, add a dummy build rule to intercept
+    # this and fail with an actionable error message.
+    repository_ctx.file(
+        "crosstool/error_gpu_disabled.bzl",
+        _DUMMY_CROSSTOOL_BZL_FILE,
+    )
+    repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
+
+def _execute(
+        repository_ctx,
+        cmdline,
+        error_msg = None,
+        error_details = None,
+        empty_stdout_fine = False):
+    """Executes an arbitrary shell command.
+
+    Args:
+      repository_ctx: the repository_ctx object
+      cmdline: list of strings, the command to execute
+      error_msg: string, a summary of the error if the command fails
+      error_details: string, details about the error or steps to fix it
+      empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
+        it's an error
+    Return:
+      the result of repository_ctx.execute(cmdline)
+    """
+    result = repository_ctx.execute(cmdline)
+    if result.stderr or not (empty_stdout_fine or result.stdout):
+        auto_configure_fail(
+            "\n".join([
+                error_msg.strip() if error_msg else "Repository command failed",
+                result.stderr.strip(),
+                error_details if error_details else "",
+            ]),
+        )
+    return result
+
+def _norm_path(path):
+    """Returns a path with '/' and remove the trailing slash."""
+    path = path.replace("\\", "/")
+    if path[-1] == "/":
+        path = path[:-1]
+    return path
+
+def _symlink_genrule_for_dir(
+        repository_ctx,
+        src_dir,
+        dest_dir,
+        genrule_name,
+        src_files = [],
+        dest_files = []):
+    """Returns a genrule to symlink(or copy if on Windows) a set of files.
+
+    If src_dir is passed, files will be read from the given directory; otherwise
+    we assume files are in src_files and dest_files
+    """
+    if src_dir != None:
+        src_dir = _norm_path(src_dir)
+        dest_dir = _norm_path(dest_dir)
+        files = _read_dir(repository_ctx, src_dir)
+
+        # Create a list with the src_dir stripped to use for outputs.
+        dest_files = files.replace(src_dir, "").splitlines()
+        src_files = files.splitlines()
+    command = []
+
+    # We clear folders that might have been generated previously to avoid
+    # undesired inclusions
+    command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
+    command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
+    outs = []
+    for i in range(len(dest_files)):
+        if dest_files[i] != "":
+            # If we have only one file to link we do not want to use the dest_dir, as
+            # $(@D) will include the full path to the file.
+            dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i]
+
+            # On Windows, symlink is not supported, so we just copy all the files.
+            cmd = "ln -s"
+            command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
+            outs.append('        "' + dest_dir + dest_files[i] + '",')
+    genrule = _genrule(
+        src_dir,
+        genrule_name,
+        " && ".join(command),
+        "\n".join(outs),
+    )
+    return genrule
+
+def _genrule(src_dir, genrule_name, command, outs):
+    """Returns a string with a genrule.
+
+    Genrule executes the given command and produces the given outputs.
+    """
+    return (
+        "genrule(\n" +
+        '    name = "' +
+        genrule_name + '",\n' +
+        "    outs = [\n" +
+        outs +
+        "\n    ],\n" +
+        '    cmd = """\n' +
+        command +
+        '\n   """,\n' +
+        ")\n"
+    )
+
+def _read_dir(repository_ctx, src_dir):
+    """Returns a string with all files in a directory.
+
+    Finds all files inside a directory, traversing subfolders and following
+    symlinks. The returned string contains the full path of all files
+    separated by line breaks.
+    """
+    find_result = _execute(
+        repository_ctx,
+        ["find", src_dir, "-follow", "-type", "f"],
+        empty_stdout_fine = True,
+    )
+    result = find_result.stdout
+    return result
+
+def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets):
+    if False:
+        amdgpu_target_flags = ["--amdgpu-target=" +
+                               amdgpu_target for amdgpu_target in amdgpu_targets]
+    else:
+        # AMDGPU targets are handled in the "crosstool_wrapper_driver_is_not_gcc"
+        amdgpu_target_flags = []
+    return str(amdgpu_target_flags)
+
+def _create_local_rocm_repository(repository_ctx):
+    """Creates the repository containing files set up to build with ROCm."""
+    rocm_config = _get_rocm_config(repository_ctx)
+
+    # Set up symbolic links for the rocm toolkit by creating genrules to do
+    # symlinking. We create one genrule for each directory we want to track under
+    # rocm_toolkit_path
+    rocm_toolkit_path = rocm_config.rocm_toolkit_path
+    rocm_include_path = rocm_toolkit_path + "/include"
+    genrules = [_symlink_genrule_for_dir(
+        repository_ctx,
+        rocm_include_path,
+        "rocm/include",
+        "rocm-include",
+    )]
+    genrules.append(_symlink_genrule_for_dir(
+        repository_ctx,
+        rocm_toolkit_path + "/rocfft/include",
+        "rocm/include/rocfft",
+        "rocfft-include",
+    ))
+    genrules.append(_symlink_genrule_for_dir(
+        repository_ctx,
+        rocm_toolkit_path + "/rocblas/include",
+        "rocm/include/rocblas",
+        "rocblas-include",
+    ))
+    genrules.append(_symlink_genrule_for_dir(
+        repository_ctx,
+        rocm_toolkit_path + "/miopen/include",
+        "rocm/include/miopen",
+        "miopen-include",
+    ))
+
+    rocm_libs = _find_libs(repository_ctx, rocm_config)
+    rocm_lib_src = []
+    rocm_lib_dest = []
+    for lib in rocm_libs.values():
+        rocm_lib_src.append(lib.path)
+        rocm_lib_dest.append("rocm/lib/" + lib.file_name)
+    genrules.append(_symlink_genrule_for_dir(
+        repository_ctx,
+        None,
+        "",
+        "rocm-lib",
+        rocm_lib_src,
+        rocm_lib_dest,
+    ))
+
+    included_files = _read_dir(repository_ctx, rocm_include_path).replace(
+        rocm_include_path,
+        "",
+    ).splitlines()
+
+    # Set up BUILD file for rocm/
+    _tpl(
+        repository_ctx,
+        "rocm:build_defs.bzl",
+        {
+            "%{rocm_is_configured}": "True",
+            "%{rocm_extra_copts}": _compute_rocm_extra_copts(
+                repository_ctx,
+                rocm_config.amdgpu_targets,
+            ),
+        },
+    )
+    _tpl(
+        repository_ctx,
+        "rocm:BUILD",
+        {
+            "%{hip_lib}": rocm_libs["hip"].file_name,
+            "%{rocblas_lib}": rocm_libs["rocblas"].file_name,
+            "%{rocfft_lib}": rocm_libs["rocfft"].file_name,
+            "%{hiprand_lib}": rocm_libs["hiprand"].file_name,
+            "%{miopen_lib}": rocm_libs["miopen"].file_name,
+            "%{rocm_include_genrules}": "\n".join(genrules),
+            "%{rocm_headers}": ('":rocm-include",\n' +
+                                '":rocfft-include",\n' +
+                                '":rocblas-include",\n' +
+                                '":miopen-include",'),
+        },
+    )
+
+    # Set up crosstool/
+    _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"})
+    cc = find_cc(repository_ctx)
+    host_compiler_includes = _host_compiler_includes(repository_ctx, cc)
+    rocm_defines = {
+        "%{rocm_include_path}": _rocm_include_path(
+            repository_ctx,
+            rocm_config,
+        ),
+        "%{host_compiler_includes}": host_compiler_includes,
+        "%{clang_path}": str(cc),
+    }
+
+    _tpl(repository_ctx, "crosstool:CROSSTOOL_hipcc", rocm_defines, out = "crosstool/CROSSTOOL")
+
+    _tpl(
+        repository_ctx,
+        "crosstool:clang/bin/crosstool_wrapper_driver_rocm",
+        {
+            "%{cpu_compiler}": str(cc),
+            "%{hipcc_path}": "/opt/rocm/bin/hipcc",
+            "%{gcc_host_compiler_path}": str(cc),
+            "%{rocm_amdgpu_targets}": ",".join(
+                ["\"%s\"" % c for c in rocm_config.amdgpu_targets],
+            ),
+        },
+    )
+
+    # Set up rocm_config.h, which is used by
+    # tensorflow/stream_executor/dso_loader.cc.
+    _tpl(
+        repository_ctx,
+        "rocm:rocm_config.h",
+        {
+            "%{rocm_amdgpu_targets}": ",".join(
+                ["\"%s\"" % c for c in rocm_config.amdgpu_targets],
+            ),
+            "%{rocm_toolkit_path}": rocm_config.rocm_toolkit_path,
+        },
+        "rocm/rocm/rocm_config.h",
+    )
+
+def _create_remote_rocm_repository(repository_ctx, remote_config_repo):
+    """Creates pointers to a remotely configured repo set up to build with ROCm."""
+    _tpl(
+        repository_ctx,
+        "rocm:build_defs.bzl",
+        {
+            "%{rocm_is_configured}": "True",
+            "%{rocm_extra_copts}": _compute_rocm_extra_copts(
+                repository_ctx,  #_compute_capabilities(repository_ctx)
+            ),
+        },
+    )
+    _tpl(
+        repository_ctx,
+        "rocm:remote.BUILD",
+        {
+            "%{remote_rocm_repo}": remote_config_repo,
+        },
+        "rocm/BUILD",
+    )
+    _tpl(repository_ctx, "crosstool:remote.BUILD", {
+        "%{remote_rocm_repo}": remote_config_repo,
+    }, "crosstool/BUILD")
+
+def _rocm_autoconf_impl(repository_ctx):
+    """Implementation of the rocm_autoconf repository rule."""
+    if not _enable_rocm(repository_ctx):
+        _create_dummy_repository(repository_ctx)
+    elif _TF_ROCM_CONFIG_REPO in repository_ctx.os.environ:
+        _create_remote_rocm_repository(
+            repository_ctx,
+            repository_ctx.os.environ[_TF_ROCM_CONFIG_REPO],
+        )
+    else:
+        _create_local_rocm_repository(repository_ctx)
+
+rocm_configure = repository_rule(
+    implementation = _rocm_autoconf_impl,
+    environ = [
+        _GCC_HOST_COMPILER_PATH,
+        "TF_NEED_ROCM",
+        _ROCM_TOOLKIT_PATH,
+        _TF_ROCM_VERSION,
+        _TF_MIOPEN_VERSION,
+        _TF_ROCM_AMDGPU_TARGETS,
+        _TF_ROCM_CONFIG_REPO,
+    ],
+)
+
+"""Detects and configures the local ROCm toolchain.
+
+Add the following to your WORKSPACE FILE:
+
+```python
+rocm_configure(name = "local_config_rocm")
+```
+
+Args:
+  name: A unique name for this workspace rule.
+"""
diff --git a/third_party/icu/BUILD b/third_party/icu/BUILD
new file mode 100644
index 0000000000..82bab3ffd9
--- /dev/null
+++ b/third_party/icu/BUILD
@@ -0,0 +1 @@
+# This empty BUILD file is required to make Bazel treat this directory as a package.
diff --git a/third_party/icu/BUILD.bazel b/third_party/icu/BUILD.bazel
new file mode 100644
index 0000000000..36d6b9006b
--- /dev/null
+++ b/third_party/icu/BUILD.bazel
@@ -0,0 +1,88 @@
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files([
+    "icu4c/LICENSE",
+    "icu4j/main/shared/licenses/LICENSE",
+])
+
+cc_library(
+    name = "headers",
+    hdrs = glob(["icu4c/source/common/unicode/*.h"]),
+    includes = [
+        "icu4c/source/common",
+    ],
+    deps = [
+    ],
+)
+
+cc_library(
+    name = "common",
+    hdrs = glob(["icu4c/source/common/unicode/*.h"]),
+    includes = [
+        "icu4c/source/common",
+    ],
+    deps = [
+        ":icuuc",
+    ],
+)
+
+cc_library(
+    name = "icuuc",
+    srcs = glob(
+        [
+            "icu4c/source/common/*.c",
+            "icu4c/source/common/*.cpp",
+            "icu4c/source/stubdata/*.cpp",
+        ],
+    ),
+    hdrs = glob([
+        "icu4c/source/common/*.h",
+    ]),
+    copts = [
+        "-DU_COMMON_IMPLEMENTATION",
+        "-DU_HAVE_STD_ATOMICS",
+    ] + select({
+        ":android": [
+            "-fdata-sections",
+            "-DGOOGLE_VENDOR_SRC_BRANCH",
+            "-DU_HAVE_NL_LANGINFO_CODESET=0",
+            "-Wno-deprecated-declarations",
+        ],
+        ":apple": [
+            "-DGOOGLE_VENDOR_SRC_BRANCH",
+            "-Wno-shorten-64-to-32",
+            "-Wno-unused-variable",
+        ],
+        ":windows": [
+            "/utf-8",
+            "/DLOCALE_ALLOW_NEUTRAL_NAMES=0",
+        ],
+        "//conditions:default": [],
+    }),
+    tags = ["requires-rtti"],
+    visibility = [
+        "//visibility:private",
+    ],
+    deps = [
+        ":headers",
+    ],
+)
+
+config_setting(
+    name = "android",
+    values = {"crosstool_top": "//external:android/crosstool"},
+)
+
+config_setting(
+    name = "apple",
+    values = {"cpu": "darwin"},
+)
+
+config_setting(
+    name = "windows",
+    values = {"cpu": "x64_windows"},
+)
diff --git a/third_party/icu/workspace.bzl b/third_party/icu/workspace.bzl
new file mode 100644
index 0000000000..bfebf4219b
--- /dev/null
+++ b/third_party/icu/workspace.bzl
@@ -0,0 +1,15 @@
+"""Loads a lightweight subset of the ICU library for Unicode processing."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "icu",
+        strip_prefix = "icu-release-62-1",
+        sha256 = "e15ffd84606323cbad5515bf9ecdf8061cc3bf80fb883b9e6aa162e485aa9761",
+        urls = [
+            "https://mirror.bazel.build/github.com/unicode-org/icu/archive/release-62-1.tar.gz",
+            "https://github.com/unicode-org/icu/archive/release-62-1.tar.gz",
+        ],
+        build_file = "//third_party/icu:BUILD.bazel",
+    )
diff --git a/third_party/jemalloc.BUILD b/third_party/jemalloc.BUILD
deleted file mode 100644
index 1b0829b8fe..0000000000
--- a/third_party/jemalloc.BUILD
+++ /dev/null
@@ -1,356 +0,0 @@
-# Description:
-# jemalloc - a general-purpose scalable concurrent malloc implementation
-
-licenses(["notice"])  # BSD
-
-exports_files(["COPYING"])
-
-load("@org_tensorflow//third_party:common.bzl", "template_rule")
-
-cc_library(
-    name = "jemalloc_headers",
-    hdrs = ["include/jemalloc/jemalloc.h"],
-    includes = ["include"],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "jemalloc_impl",
-    srcs = [
-        "src/arena.c",
-        "src/atomic.c",
-        "src/base.c",
-        "src/bitmap.c",
-        "src/chunk.c",
-        "src/chunk_dss.c",
-        "src/chunk_mmap.c",
-        "src/ckh.c",
-        "src/ctl.c",
-        "src/extent.c",
-        "src/hash.c",
-        "src/huge.c",
-        "src/jemalloc.c",
-        "src/mb.c",
-        "src/mutex.c",
-        "src/nstime.c",
-        "src/pages.c",
-        "src/prng.c",
-        "src/prof.c",
-        "src/quarantine.c",
-        "src/rtree.c",
-        "src/spin.c",
-        "src/stats.c",
-        "src/tcache.c",
-        "src/tsd.c",
-        "src/util.c",
-        "src/witness.c",
-    ],
-    hdrs = [
-        "include/jemalloc/internal/arena.h",
-        "include/jemalloc/internal/assert.h",
-        "include/jemalloc/internal/atomic.h",
-        "include/jemalloc/internal/base.h",
-        "include/jemalloc/internal/bitmap.h",
-        "include/jemalloc/internal/chunk.h",
-        "include/jemalloc/internal/chunk_dss.h",
-        "include/jemalloc/internal/chunk_mmap.h",
-        "include/jemalloc/internal/ckh.h",
-        "include/jemalloc/internal/ctl.h",
-        "include/jemalloc/internal/extent.h",
-        "include/jemalloc/internal/hash.h",
-        "include/jemalloc/internal/huge.h",
-        "include/jemalloc/internal/jemalloc_internal.h",
-        "include/jemalloc/internal/jemalloc_internal_decls.h",
-        "include/jemalloc/internal/jemalloc_internal_defs.h",
-        "include/jemalloc/internal/jemalloc_internal_macros.h",
-        "include/jemalloc/internal/mb.h",
-        "include/jemalloc/internal/mutex.h",
-        "include/jemalloc/internal/nstime.h",
-        "include/jemalloc/internal/pages.h",
-        "include/jemalloc/internal/ph.h",
-        "include/jemalloc/internal/private_namespace.h",
-        "include/jemalloc/internal/prng.h",
-        "include/jemalloc/internal/prof.h",
-        "include/jemalloc/internal/ql.h",
-        "include/jemalloc/internal/qr.h",
-        "include/jemalloc/internal/quarantine.h",
-        "include/jemalloc/internal/rb.h",
-        "include/jemalloc/internal/rtree.h",
-        "include/jemalloc/internal/size_classes.h",
-        "include/jemalloc/internal/smoothstep.h",
-        "include/jemalloc/internal/spin.h",
-        "include/jemalloc/internal/stats.h",
-        "include/jemalloc/internal/tcache.h",
-        "include/jemalloc/internal/ticker.h",
-        "include/jemalloc/internal/tsd.h",
-        "include/jemalloc/internal/util.h",
-        "include/jemalloc/internal/valgrind.h",
-        "include/jemalloc/internal/witness.h",
-    ],
-    # Same flags that jemalloc uses to build.
-    copts = [
-        "-O3",
-        "-funroll-loops",
-        "-D_GNU_SOURCE",
-        "-D_REENTRANT",
-    ],
-    includes = ["include"],
-    # pthread_atfork() is called for PPC.
-    linkopts = select({
-        "@org_tensorflow//tensorflow:linux_ppc64le": [
-            "-lpthread",
-        ],
-        "@org_tensorflow//tensorflow:linux_x86_64": [
-            "-lpthread",
-        ],
-        "//conditions:default": [
-        ],
-    }),
-    visibility = ["//visibility:public"],
-    deps = [":jemalloc_headers"],
-)
-
-sh_binary(
-    name = "jemalloc_sh",
-    srcs = ["include/jemalloc/jemalloc.sh"],
-)
-
-genrule(
-    name = "jemalloc_h",
-    srcs = [
-        ":jemalloc_defs_h",
-        ":jemalloc_macros_h",
-        ":jemalloc_mangle_h",
-        ":jemalloc_protos_h",
-        ":jemalloc_rename_h",
-        ":jemalloc_typedefs_h",
-    ],
-    outs = ["include/jemalloc/jemalloc.h"],
-    cmd = "$(location :jemalloc_sh) $$(dirname $(location :jemalloc_defs_h))/../../ >$@",
-    tools = [":jemalloc_sh"],
-)
-
-# Add to this list if you want to export more symbols from jemalloc.
-genrule(
-    name = "public_symbols_txt",
-    outs = ["include/jemalloc/internal/public_symbols.txt"],
-    cmd = "\n".join([
-        "cat <<'EOF' > $@",
-        "free:jemalloc_free",
-        "malloc:jemalloc_malloc",
-        "posix_memalign:jemalloc_posix_memalign",
-        "realloc:jemalloc_realloc",
-        "EOF",
-    ]),
-)
-
-sh_binary(
-    name = "jemalloc_mangle_sh",
-    srcs = ["include/jemalloc/jemalloc_mangle.sh"],
-)
-
-genrule(
-    name = "jemalloc_mangle_h",
-    srcs = [":public_symbols_txt"],
-    outs = ["include/jemalloc/jemalloc_mangle.h"],
-    cmd = "$(location :jemalloc_mangle_sh) $(location :public_symbols_txt) je_ >$@",
-    tools = [":jemalloc_mangle_sh"],
-)
-
-sh_binary(
-    name = "jemalloc_rename_sh",
-    srcs = ["include/jemalloc/jemalloc_rename.sh"],
-)
-
-genrule(
-    name = "jemalloc_rename_h",
-    srcs = [":public_symbols_txt"],
-    outs = ["include/jemalloc/jemalloc_rename.h"],
-    cmd = "$(location :jemalloc_rename_sh) $(location :public_symbols_txt) >$@",
-    tools = [":jemalloc_rename_sh"],
-)
-
-sh_binary(
-    name = "private_namespace_sh",
-    srcs = ["include/jemalloc/internal/private_namespace.sh"],
-)
-
-genrule(
-    name = "private_namespace_h",
-    srcs = ["include/jemalloc/internal/private_symbols.txt"],
-    outs = ["include/jemalloc/internal/private_namespace.h"],
-    cmd = "$(location :private_namespace_sh) $(location include/jemalloc/internal/private_symbols.txt) >$@",
-    tools = [":private_namespace_sh"],
-)
-
-sh_binary(
-    name = "public_namespace_sh",
-    srcs = ["include/jemalloc/internal/public_namespace.sh"],
-)
-
-genrule(
-    name = "public_namespace_h",
-    srcs = [":public_symbols_txt"],
-    outs = ["include/jemalloc/internal/public_namespace.h"],
-    cmd = "$(location :public_namespace_sh) $(location :public_symbols_txt) >$@",
-    tools = [":public_namespace_sh"],
-)
-
-sh_binary(
-    name = "size_classes_sh",
-    srcs = ["include/jemalloc/internal/size_classes.sh"],
-)
-
-# Size classes for Linux x86_64 and ppc64le. Update if adding builds for other
-# architectures. See size_classes.sh for details on the arguments.
-# For default case, kept the arguments same as that of  x86_64 for now.
-genrule(
-    name = "size_classes_h",
-    outs = ["include/jemalloc/internal/size_classes.h"],
-    cmd = select({
-        "@org_tensorflow//tensorflow:linux_ppc64le": "$(location :size_classes_sh) \"3 4\" 3 16 2 >$@",
-        "@org_tensorflow//tensorflow:linux_x86_64": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@",
-        "//conditions:default": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@",
-    }),
-    tools = [":size_classes_sh"],
-)
-
-template_rule(
-    name = "jemalloc_internal_h",
-    src = "include/jemalloc/internal/jemalloc_internal.h.in",
-    out = "include/jemalloc/internal/jemalloc_internal.h",
-    substitutions = {
-        "@private_namespace@": "je_",
-        "@install_suffix@": "",
-    },
-)
-
-template_rule(
-    name = "jemalloc_internal_defs_h",
-    src = "include/jemalloc/internal/jemalloc_internal_defs.h.in",
-    out = "include/jemalloc/internal/jemalloc_internal_defs.h",
-    substitutions = {
-        "#undef JEMALLOC_PREFIX": "#define JEMALLOC_PREFIX \"jemalloc_\"",
-        "#undef JEMALLOC_CPREFIX": "#define JEMALLOC_CPREFIX \"JEMALLOC_\"",
-        "#undef JEMALLOC_PRIVATE_NAMESPACE": "#define JEMALLOC_PRIVATE_NAMESPACE je_",
-        "#undef CPU_SPINWAIT": "\n".join([
-            "#if defined(__powerpc64__) || defined(__powerpc__)",
-            "#define CPU_SPINWAIT __asm__ volatile(\"or 27,27,27\")",
-            "#else",
-            "#define CPU_SPINWAIT __asm__ volatile(\"pause\")",
-            "#endif",
-        ]),
-        "#undef JEMALLOC_HAVE_BUILTIN_CLZ": "#define JEMALLOC_HAVE_BUILTIN_CLZ",
-        "#undef JEMALLOC_USE_SYSCALL": "#define JEMALLOC_USE_SYSCALL",
-        "#undef JEMALLOC_HAVE_SECURE_GETENV": "#define JEMALLOC_HAVE_SECURE_GETENV",
-        "#undef JEMALLOC_HAVE_PTHREAD_ATFORK": "#define JEMALLOC_HAVE_PTHREAD_ATFORK",
-        "#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE": "#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1",
-        # Newline required because of substitution conflicts.
-        "#undef JEMALLOC_HAVE_CLOCK_MONOTONIC\n": "#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1\n",
-        "#undef JEMALLOC_THREADED_INIT": "#define JEMALLOC_THREADED_INIT",
-        "#undef JEMALLOC_TLS_MODEL": "#define JEMALLOC_TLS_MODEL __attribute__((tls_model(\"initial-exec\")))",
-        "#undef JEMALLOC_CC_SILENCE": "#define JEMALLOC_CC_SILENCE",
-        "#undef JEMALLOC_STATS": "#define JEMALLOC_STATS",
-        "#undef JEMALLOC_TCACHE": "#define JEMALLOC_TCACHE",
-        "#undef JEMALLOC_DSS": "#define JEMALLOC_DSS",
-        "#undef JEMALLOC_FILL": "#define JEMALLOC_FILL",
-        "#undef LG_TINY_MIN": "#define LG_TINY_MIN 3",
-        "#undef LG_PAGE": "\n".join([
-            "#if defined(__powerpc64__) || defined(__powerpc__)",
-            "#define LG_PAGE 16",
-            "#else",
-            "#define LG_PAGE 12",
-            "#endif",
-        ]),
-        "#undef JEMALLOC_MAPS_COALESCE": "#define JEMALLOC_MAPS_COALESCE",
-        "#undef JEMALLOC_TLS": "#define JEMALLOC_TLS",
-        "#undef JEMALLOC_INTERNAL_UNREACHABLE": "#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable",
-        "#undef JEMALLOC_INTERNAL_FFSLL": "#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll",
-        # Newline required because of substitution conflicts.
-        "#undef JEMALLOC_INTERNAL_FFSL\n": "#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl\n",
-        "#undef JEMALLOC_INTERNAL_FFS\n": "#define JEMALLOC_INTERNAL_FFS __builtin_ffs\n",
-        "#undef JEMALLOC_CACHE_OBLIVIOUS": "#define JEMALLOC_CACHE_OBLIVIOUS",
-        "#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY": "#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY",
-        "#undef JEMALLOC_HAVE_MADVISE": "#define JEMALLOC_HAVE_MADVISE",
-        "#undef JEMALLOC_PURGE_MADVISE_DONTNEED": "#define JEMALLOC_PURGE_MADVISE_DONTNEED",
-        "#undef JEMALLOC_THP": "#define JEMALLOC_THP",
-        "#undef JEMALLOC_HAS_ALLOCA_H": "#define JEMALLOC_HAS_ALLOCA_H 1",
-        # Newline required because of substitution conflicts.
-        "#undef LG_SIZEOF_INT\n": "#define LG_SIZEOF_INT 2\n",
-        "#undef LG_SIZEOF_LONG\n": "#define LG_SIZEOF_LONG 3\n",
-        "#undef LG_SIZEOF_LONG_LONG": "#define LG_SIZEOF_LONG_LONG 3",
-        "#undef LG_SIZEOF_INTMAX_T": "#define LG_SIZEOF_INTMAX_T 3",
-        "#undef JEMALLOC_GLIBC_MALLOC_HOOK": "#define JEMALLOC_GLIBC_MALLOC_HOOK",
-        "#undef JEMALLOC_GLIBC_MEMALIGN_HOOK": "#define JEMALLOC_GLIBC_MEMALIGN_HOOK",
-        "#undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP": "#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP",
-        "#undef JEMALLOC_CONFIG_MALLOC_CONF": "#define JEMALLOC_CONFIG_MALLOC_CONF \"\"",
-    },
-)
-
-template_rule(
-    name = "jemalloc_defs_h",
-    src = "include/jemalloc/jemalloc_defs.h.in",
-    out = "include/jemalloc/jemalloc_defs.h",
-    substitutions = {
-        "#undef JEMALLOC_HAVE_ATTR": "#define JEMALLOC_HAVE_ATTR",
-        "#undef JEMALLOC_HAVE_ATTR_ALLOC_SIZE": "#define JEMALLOC_HAVE_ATTR_ALLOC_SIZE",
-        "#undef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF": "#define JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF",
-        "#undef JEMALLOC_HAVE_ATTR_FORMAT_PRINTF": "#define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF",
-        "#undef JEMALLOC_OVERRIDE_MEMALIGN": "#define JEMALLOC_OVERRIDE_MEMALIGN",
-        "#undef JEMALLOC_OVERRIDE_VALLOC": "#define JEMALLOC_OVERRIDE_VALLOC",
-        "#undef JEMALLOC_USABLE_SIZE_CONST": "#define JEMALLOC_USABLE_SIZE_CONST",
-        "#undef JEMALLOC_USE_CXX_THROW": "#define JEMALLOC_USE_CXX_THROW",
-        "#undef LG_SIZEOF_PTR": "#define LG_SIZEOF_PTR 3",
-    },
-)
-
-template_rule(
-    name = "jemalloc_macros_h",
-    src = "include/jemalloc/jemalloc_macros.h.in",
-    out = "include/jemalloc/jemalloc_macros.h",
-    substitutions = {
-        "@jemalloc_version@": "0.0.0",
-        "@jemalloc_version_major@": "0",
-        "@jemalloc_version_minor@": "0",
-        "@jemalloc_version_bugfix@": "0",
-        "@jemalloc_version_nrev@": "0",
-        "@jemalloc_version_gid@": "0000000000000000000000000000000000000000",
-    },
-)
-
-template_rule(
-    name = "jemalloc_protos_h",
-    src = "include/jemalloc/jemalloc_protos.h.in",
-    out = "include/jemalloc/jemalloc_protos.h",
-    substitutions = {
-        "@aligned_alloc": "aligned_alloc",
-        "@calloc": "calloc",
-        "@cbopaque": "cbopaque",
-        "@dallocx": "dallocx",
-        "@free": "free",
-        "@je": "je",
-        "@mallctl": "mallctl",
-        "@mallctlnametomib": "mallctlnametomib",
-        "@mallctlbymib": "mallctlbymib",
-        "@malloc_stats_print": "malloc_stats_print",
-        "@malloc_usable_size": "malloc_usable_size",
-        "@malloc": "malloc",
-        "@mallocx": "mallocx",
-        "@memalign": "memalign",
-        "@nallocx": "nallocx",
-        "@posix_memalign": "posix_memalign",
-        "@rallocx": "rallocx",
-        "@realloc": "realloc",
-        "@sallocx": "sallocx",
-        "@sdallocx": "sdallocx",
-        "@valloc": "valloc",
-        "@xallocx": "xallocx",
-    },
-)
-
-template_rule(
-    name = "jemalloc_typedefs_h",
-    src = "include/jemalloc/jemalloc_typedefs.h.in",
-    out = "include/jemalloc/jemalloc_typedefs.h",
-    substitutions = {},
-)
diff --git a/third_party/jpeg/BUILD b/third_party/jpeg/BUILD
index 5b01f6e3e4..e3aec1fce9 100644
--- a/third_party/jpeg/BUILD
+++ b/third_party/jpeg/BUILD
@@ -1 +1 @@
-licenses(["notice"])
+# Needed to make this a package.
diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/BUILD.bazel
index 1b9b9bf2f5..5243e995a3 100644
--- a/third_party/jpeg/jpeg.BUILD
+++ b/third_party/jpeg/BUILD.bazel
@@ -162,9 +162,9 @@ cc_library(
     hdrs = [
         "simd/powerpc/jccolext-altivec.c",
         "simd/powerpc/jcgryext-altivec.c",
+        "simd/powerpc/jcsample.h",
         "simd/powerpc/jdcolext-altivec.c",
         "simd/powerpc/jdmrgext-altivec.c",
-        "simd/powerpc/jcsample.h",
         "simd/powerpc/jsimd_altivec.h",
     ],
     copts = libjpegturbo_copts,
@@ -186,7 +186,6 @@ cc_library(
         "jsimd.h",
         "jsimddct.h",
         "simd/jsimd.h",
-        "simd/x86_64/jsimd.c",
         "simd/x86_64/jccolor-avx2.o",
         "simd/x86_64/jccolor-sse2.o",
         "simd/x86_64/jcgray-avx2.o",
@@ -213,6 +212,7 @@ cc_library(
         "simd/x86_64/jquantf-sse2.o",
         "simd/x86_64/jquanti-avx2.o",
         "simd/x86_64/jquanti-sse2.o",
+        "simd/x86_64/jsimd.c",
         "simd/x86_64/jsimdcpu.o",
     ],
     copts = libjpegturbo_copts,
@@ -322,9 +322,9 @@ cc_library(
         "jpeglib.h",
         "jsimd.h",
         "jsimddct.h",
-        "simd/jsimd.h",
         "simd/arm/jsimd.c",
         "simd/arm/jsimd_neon.S",
+        "simd/jsimd.h",
     ],
     copts = libjpegturbo_copts,
     nocopts = libjpegturbo_nocopts,
@@ -343,9 +343,9 @@ cc_library(
         "jpeglib.h",
         "jsimd.h",
         "jsimddct.h",
-        "simd/jsimd.h",
         "simd/arm64/jsimd.c",
         "simd/arm64/jsimd_neon.S",
+        "simd/jsimd.h",
     ],
     copts = libjpegturbo_copts,
     nocopts = libjpegturbo_nocopts,
@@ -366,7 +366,6 @@ cc_library(
         "jsimd.h",
         "jsimddct.h",
         "simd/jsimd.h",
-        "simd/x86_64/jsimd.c",
         "simd/x86_64/jccolor-avx2.obj",
         "simd/x86_64/jccolor-sse2.obj",
         "simd/x86_64/jcgray-avx2.obj",
@@ -393,6 +392,7 @@ cc_library(
         "simd/x86_64/jquantf-sse2.obj",
         "simd/x86_64/jquanti-avx2.obj",
         "simd/x86_64/jquanti-sse2.obj",
+        "simd/x86_64/jsimd.c",
         "simd/x86_64/jsimdcpu.obj",
     ],
     copts = libjpegturbo_copts,
@@ -603,6 +603,7 @@ JCONFIGINT_WIN_SUBSTITUTIONS = {
 }
 
 JCONFIGINT_NOWIN_SUBSTITUTIONS.update(JCONFIGINT_COMMON_SUBSTITUTIONS)
+
 JCONFIGINT_WIN_SUBSTITUTIONS.update(JCONFIGINT_COMMON_SUBSTITUTIONS)
 
 template_rule(
diff --git a/third_party/systemlibs/jpeg.BUILD b/third_party/jpeg/BUILD.system
index f4f52da9bd..f4f52da9bd 100644
--- a/third_party/systemlibs/jpeg.BUILD
+++ b/third_party/jpeg/BUILD.system
diff --git a/third_party/jpeg/jpeg_helpers.BUILD.bazel b/third_party/jpeg/jpeg_helpers.BUILD.bazel
new file mode 100644
index 0000000000..5b01f6e3e4
--- /dev/null
+++ b/third_party/jpeg/jpeg_helpers.BUILD.bazel
@@ -0,0 +1 @@
+licenses(["notice"])
diff --git a/third_party/jpeg/workspace.bzl b/third_party/jpeg/workspace.bzl
new file mode 100644
index 0000000000..2bb7dacd32
--- /dev/null
+++ b/third_party/jpeg/workspace.bzl
@@ -0,0 +1,16 @@
+"""loads the jpeg library, used by TF."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "jpeg",
+        urls = [
+            "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
+            "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
+        ],
+        sha256 = "f892fff427ab3adffc289363eac26d197ce3ccacefe5f5822377348a8166069b",
+        strip_prefix = "libjpeg-turbo-2.0.0",
+        build_file = "//third_party/jpeg:BUILD.bazel",
+        system_build_file = "//third_party/jpeg:BUILD.system",
+    )
diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD
index efff7fd51b..15a3e5cfa7 100644
--- a/third_party/mkl/BUILD
+++ b/third_party/mkl/BUILD
@@ -1,26 +1,26 @@
 licenses(["notice"])  # 3-Clause BSD
 
 config_setting(
-    name = "using_mkl",
+    name = "build_with_mkl",
     define_values = {
-        "using_mkl": "true",
+        "build_with_mkl": "true",
     },
     visibility = ["//visibility:public"],
 )
 
 config_setting(
-    name = "using_mkl_ml_only",
+    name = "build_with_mkl_ml_only",
     define_values = {
-        "using_mkl": "true",
-        "using_mkl_ml_only": "true",
+        "build_with_mkl": "true",
+        "build_with_mkl_ml_only": "true",
     },
     visibility = ["//visibility:public"],
 )
 
 config_setting(
-    name = "using_mkl_lnx_x64",
+    name = "build_with_mkl_lnx_x64",
     define_values = {
-        "using_mkl": "true",
+        "build_with_mkl": "true",
     },
     values = {
         "cpu": "k8",
@@ -28,6 +28,15 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+config_setting(
+    name = "enable_mkl",
+    define_values = {
+        "enable_mkl": "true",
+        "build_with_mkl": "true",
+    },
+    visibility = ["//visibility:public"],
+)
+
 load(
     "//third_party/mkl:build_defs.bzl",
     "if_mkl",
diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl
index b645c0fc5c..10c2d90c84 100644
--- a/third_party/mkl/build_defs.bzl
+++ b/third_party/mkl/build_defs.bzl
@@ -1,9 +1,11 @@
 # -*- Python -*-
 """Skylark macros for MKL.
-if_mkl is a conditional to check if MKL is enabled or not.
-if_mkl_ml is a conditional to check if MKL-ML is enabled.
+
+if_mkl is a conditional to check if we are building with MKL.
+if_mkl_ml is a conditional to check if we are building with MKL-ML.
 if_mkl_ml_only is a conditional to check for MKL-ML-only (no MKL-DNN) mode.
 if_mkl_lnx_x64 is a conditional to check for MKL
+if_enable_mkl is a conditional to check if building with MKL and MKL is enabled.
 
 mkl_repository is a repository rule for creating MKL repository rule that can
 be pointed to either a local folder, or download it from the internet.
@@ -24,7 +26,7 @@ def if_mkl(if_true, if_false = []):
       a select evaluating to either if_true or if_false as appropriate.
     """
     return select({
-        str(Label("//third_party/mkl:using_mkl")): if_true,
+        str(Label("//third_party/mkl:build_with_mkl")): if_true,
         "//conditions:default": if_false,
     })
 
@@ -40,8 +42,8 @@ def if_mkl_ml(if_true, if_false = []):
       a select evaluating to either if_true or if_false as appropriate.
     """
     return select({
-        str(Label("//third_party/mkl_dnn:using_mkl_dnn_only")): if_false,
-        str(Label("//third_party/mkl:using_mkl")): if_true,
+        str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_only")): if_false,
+        str(Label("//third_party/mkl:build_with_mkl")): if_true,
         "//conditions:default": if_false,
     })
 
@@ -56,12 +58,12 @@ def if_mkl_ml_only(if_true, if_false = []):
       a select evaluating to either if_true or if_false as appropriate.
     """
     return select({
-        str(Label("//third_party/mkl:using_mkl_ml_only")): if_true,
+        str(Label("//third_party/mkl:build_with_mkl_ml_only")): if_true,
         "//conditions:default": if_false,
     })
 
 def if_mkl_lnx_x64(if_true, if_false = []):
-    """Shorthand to select() on if MKL is on and the target is Linux x86-64.
+    """Shorthand to select() if building with MKL and the target is Linux x86-64.
 
     Args:
       if_true: expression to evaluate if building with MKL is enabled and the
@@ -73,7 +75,24 @@ def if_mkl_lnx_x64(if_true, if_false = []):
       a select evaluating to either if_true or if_false as appropriate.
     """
     return select({
-        str(Label("//third_party/mkl:using_mkl_lnx_x64")): if_true,
+        str(Label("//third_party/mkl:build_with_mkl_lnx_x64")): if_true,
+        "//conditions:default": if_false,
+    })
+
+def if_enable_mkl(if_true, if_false = []):
+    """Shorthand to select() if we are building with MKL and MKL is enabled.
+
+    This is only effective when built with MKL.
+
+    Args:
+      if_true: expression to evaluate if building with MKL and MKL is enabled
+      if_false: expression to evaluate if building without MKL or MKL is not enabled.
+
+    Returns:
+      A select evaluating to either if_true or if_false as appropriate.
+    """
+    return select({
+        str(Label("//third_party/mkl:enable_mkl")): if_true,
         "//conditions:default": if_false,
     })
 
@@ -87,9 +106,9 @@ def mkl_deps():
       inclusion in the deps attribute of rules.
     """
     return select({
-        str(Label("//third_party/mkl_dnn:using_mkl_dnn_only")): ["@mkl_dnn"],
-        str(Label("//third_party/mkl:using_mkl_ml_only")): ["//third_party/mkl:intel_binary_blob"],
-        str(Label("//third_party/mkl:using_mkl")): [
+        str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_only")): ["@mkl_dnn"],
+        str(Label("//third_party/mkl:build_with_mkl_ml_only")): ["//third_party/mkl:intel_binary_blob"],
+        str(Label("//third_party/mkl:build_with_mkl")): [
             "//third_party/mkl:intel_binary_blob",
             "@mkl_dnn",
         ],
diff --git a/third_party/mkl_dnn/BUILD b/third_party/mkl_dnn/BUILD
index 3e567fa9fc..58ecda55e6 100644
--- a/third_party/mkl_dnn/BUILD
+++ b/third_party/mkl_dnn/BUILD
@@ -3,10 +3,10 @@ licenses(["notice"])
 exports_files(["LICENSE"])
 
 config_setting(
-    name = "using_mkl_dnn_only",
+    name = "build_with_mkl_dnn_only",
     define_values = {
-        "using_mkl": "true",
-        "using_mkl_dnn_only": "true",
+        "build_with_mkl": "true",
+        "build_with_mkl_dnn_only": "true",
     },
     visibility = ["//visibility:public"],
 )
diff --git a/third_party/mkl_dnn/build_defs.bzl b/third_party/mkl_dnn/build_defs.bzl
index 7ce2a7d9b0..6388f31971 100644
--- a/third_party/mkl_dnn/build_defs.bzl
+++ b/third_party/mkl_dnn/build_defs.bzl
@@ -8,6 +8,6 @@ def if_mkl_open_source_only(if_true, if_false = []):
 
     """
     return select({
-        str(Label("//third_party/mkl_dnn:using_mkl_dnn_only")): if_true,
+        str(Label("//third_party/mkl_dnn:build_with_mkl_dnn_only")): if_true,
         "//conditions:default": if_false,
     })
diff --git a/third_party/nasm/BUILD b/third_party/nasm/BUILD
new file mode 100644
index 0000000000..e3aec1fce9
--- /dev/null
+++ b/third_party/nasm/BUILD
@@ -0,0 +1 @@
+# Needed to make this a package.
diff --git a/third_party/nasm.BUILD b/third_party/nasm/BUILD.bazel
index d746a65e7e..c68d713946 100644
--- a/third_party/nasm.BUILD
+++ b/third_party/nasm/BUILD.bazel
@@ -137,12 +137,6 @@ cc_binary(
         ":windows": ["config/msvc.h"],
         "//conditions:default": [],
     }),
-    includes = [
-        "asm",
-        "include",
-        "output",
-        "x86",
-    ],
     copts = select({
         ":windows": [],
         "//conditions:default": [
@@ -157,6 +151,12 @@ cc_binary(
             "HAVE_SYS_TYPES_H",
         ],
     }),
+    includes = [
+        "asm",
+        "include",
+        "output",
+        "x86",
+    ],
     visibility = ["@jpeg//:__pkg__"],
 )
 
diff --git a/third_party/systemlibs/nasm.BUILD b/third_party/nasm/BUILD.system
index 10ef8d8832..10ef8d8832 100644
--- a/third_party/systemlibs/nasm.BUILD
+++ b/third_party/nasm/BUILD.system
diff --git a/third_party/nasm/workspace.bzl b/third_party/nasm/workspace.bzl
new file mode 100644
index 0000000000..6d50f6fcad
--- /dev/null
+++ b/third_party/nasm/workspace.bzl
@@ -0,0 +1,17 @@
+"""loads the nasm library, used by TF."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "nasm",
+        urls = [
+            "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2",
+            "http://pkgs.fedoraproject.org/repo/pkgs/nasm/nasm-2.13.03.tar.bz2/sha512/d7a6b4cee8dfd603d8d4c976e5287b5cc542fa0b466ff989b743276a6e28114e64289bf02a7819eca63142a5278aa6eed57773007e5f589e15768e6456a8919d/nasm-2.13.03.tar.bz2",
+            "http://www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2",
+        ],
+        sha256 = "63ec86477ad3f0f6292325fd89e1d93aea2e2fd490070863f17d48f7cd387011",
+        strip_prefix = "nasm-2.13.03",
+        build_file = "//third_party/nasm:BUILD.bazel",
+        system_build_file = "//third_party/nasm:BUILD.system",
+    )
diff --git a/third_party/nccl/LICENSE b/third_party/nccl/LICENSE
index 146d9b765c..b958518186 100644
--- a/third_party/nccl/LICENSE
+++ b/third_party/nccl/LICENSE
@@ -1,203 +1,30 @@
-Copyright 2018 The TensorFlow Authors.  All rights reserved.
 
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright 2018, The TensorFlow Authors.
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+ Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+  * Neither the name of NVIDIA CORPORATION, Lawrence Berkeley National
+    Laboratory, the U.S. Department of Energy, nor the names of their
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ The U.S. Department of Energy funded the development of this software
+ under subcontract 7078610 with Lawrence Berkeley National Laboratory.
diff --git a/third_party/nccl/archive.BUILD b/third_party/nccl/archive.BUILD
new file mode 100644
index 0000000000..f57f04c75e
--- /dev/null
+++ b/third_party/nccl/archive.BUILD
@@ -0,0 +1,179 @@
+# NVIDIA NCCL 2
+# A package of optimized primitives for collective multi-GPU communication.
+
+licenses(["restricted"])
+
+exports_files(["LICENSE.txt"])
+
+load(
+    "@local_config_nccl//:build_defs.bzl",
+    "device_link",
+    "gen_nccl_h",
+    "nccl_library",
+    "rdc_copts",
+)
+load(
+    "@local_config_cuda//cuda:build_defs.bzl",
+    "cuda_default_copts",
+)
+
+# Generate the nccl.h header file.
+gen_nccl_h(
+    name = "nccl_h",
+    output = "src/nccl.h",
+    template = "src/nccl.h.in",
+)
+
+nccl_library(
+    name = "src_hdrs",
+    hdrs = [
+        "src/nccl.h",
+        # src/include/common_coll.h #includes "collectives/collectives.h".
+        # All other #includes of collectives.h are patched in process_srcs.
+        "src/collectives/collectives.h",
+    ],
+    strip_include_prefix = "src",
+)
+
+nccl_library(
+    name = "include_hdrs",
+    hdrs = glob(["src/include/*.h"]),
+    strip_include_prefix = "src/include",
+)
+
+filegroup(
+    name = "device_hdrs",
+    srcs = glob(["src/collectives/device/*.h"]),
+)
+
+filegroup(
+    name = "device_srcs",
+    srcs = [
+        "src/collectives/device/all_gather.cu",
+        "src/collectives/device/all_reduce.cu",
+        "src/collectives/device/broadcast.cu",
+        "src/collectives/device/reduce.cu",
+        "src/collectives/device/reduce_scatter.cu",
+    ],
+)
+
+nccl_library(
+    name = "sum",
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=0"] + rdc_copts(),
+    prefix = "sum_",
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+nccl_library(
+    name = "prod",
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=1"] + rdc_copts(),
+    prefix = "_prod",
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+nccl_library(
+    name = "min",
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=2"] + rdc_copts(),
+    prefix = "min_",
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+nccl_library(
+    name = "max",
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=3"] + rdc_copts(),
+    prefix = "max_",
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+nccl_library(
+    name = "functions",
+    srcs = [
+        ":device_hdrs",
+        "src/collectives/device/functions.cu",
+    ],
+    copts = rdc_copts(),
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+device_link(
+    name = "device_code",
+    srcs = [
+        ":functions",
+        ":max",
+        ":min",
+        ":prod",
+        ":sum",
+    ],
+)
+
+# Primary NCCL target.
+nccl_library(
+    name = "nccl",
+    srcs = glob(
+        include = ["src/**/*.cu"],
+        # Exclude device-library code.
+        exclude = ["src/collectives/device/**"],
+    ) + [
+        # Required for header inclusion checking (see
+        # http://docs.bazel.build/versions/master/be/c-cpp.html#hdrs).
+        # Files in src/ which #include "nccl.h" load it from there rather than
+        # from the virtual includes directory.
+        "src/nccl.h",
+    ],
+    hdrs = ["src/nccl.h"],
+    include_prefix = "third_party/nccl",
+    strip_include_prefix = "src",
+    copts = cuda_default_copts(),
+    deps = [
+        ":device_code",
+        ":functions",
+        ":include_hdrs",
+        ":max",
+        ":min",
+        ":prod",
+        ":src_hdrs",
+        ":sum",
+    ],
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl
new file mode 100644
index 0000000000..ede1d3dad5
--- /dev/null
+++ b/third_party/nccl/build_defs.bzl.tpl
@@ -0,0 +1,210 @@
+"""Repository rule for NCCL."""
+
+load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts")
+
+def _gen_nccl_h_impl(ctx):
+    """Creates nccl.h from a template."""
+    ctx.actions.expand_template(
+        output = ctx.outputs.output,
+        template = ctx.file.template,
+        substitutions = {
+            "${nccl:Major}": "2",
+            "${nccl:Minor}": "3",
+            "${nccl:Patch}": "5",
+            "${nccl:Suffix}": "",
+            "${nccl:Version}": "2305",
+        },
+    )
+gen_nccl_h = rule(
+    implementation = _gen_nccl_h_impl,
+    attrs = {
+        "template": attr.label(allow_single_file = True),
+        "output": attr.output(),
+    },
+)
+"""Creates the NCCL header file."""
+
+
+def _process_srcs_impl(ctx):
+    """Appends .cc to .cu files, patches include directives."""
+    files = []
+    for src in ctx.files.srcs:
+        if not src.is_source:
+          # Process only once, specifically "src/nccl.h".
+          files.append(src)
+          continue
+        name = src.basename
+        if src.extension == "cu":
+            name = ctx.attr.prefix + name + ".cc"
+        file = ctx.actions.declare_file(name, sibling = src)
+        ctx.actions.expand_template(
+            output = file,
+            template = src,
+            substitutions = {
+                "\"collectives.h": "\"collectives/collectives.h",
+                "\"../collectives.h": "\"collectives/collectives.h",
+                "#if __CUDACC_VER_MAJOR__":
+                    "#if defined __CUDACC_VER_MAJOR__ && __CUDACC_VER_MAJOR__",
+                # Substitutions are applied in order.
+                "std::nullptr_t": "nullptr_t",
+                "nullptr_t": "std::nullptr_t",
+            },
+        )
+        files.append(file)
+    return [DefaultInfo(files = depset(files))]
+_process_srcs = rule(
+    implementation = _process_srcs_impl,
+    attrs = {
+        "srcs": attr.label_list(allow_files = True),
+        "prefix": attr.string(default = ""),
+    },
+)
+"""Processes the NCCL srcs so they can be compiled with bazel and clang."""
+
+
+def nccl_library(name, srcs=None, hdrs=None, prefix=None, **kwargs):
+    """Processes the srcs and hdrs and creates a cc_library."""
+
+    _process_srcs(
+        name = name + "_srcs",
+        srcs = srcs,
+        prefix = prefix,
+    )
+    _process_srcs(
+        name = name + "_hdrs",
+        srcs = hdrs,
+    )
+
+    native.cc_library(
+        name = name,
+        srcs = [name + "_srcs"] if srcs else [],
+        hdrs = [name + "_hdrs"] if hdrs else [],
+        **kwargs
+    )
+
+
+def rdc_copts():
+    """Returns copts for compiling relocatable device code."""
+
+    # The global functions can not have a lower register count than the
+    # device functions. This is enforced by setting a fixed register count.
+    # https://github.com/NVIDIA/nccl/blob/f93fe9bfd94884cec2ba711897222e0df5569a53/makefiles/common.mk#L48
+    maxrregcount = "-maxrregcount=96"
+
+    return cuda_default_copts() + select({
+          "@local_config_cuda//cuda:using_nvcc": [
+              "-nvcc_options",
+              "relocatable-device-code=true",
+              "-nvcc_options",
+              "ptxas-options=" + maxrregcount,
+          ],
+          "@local_config_cuda//cuda:using_clang": [
+              "-fcuda-rdc",
+              "-Xcuda-ptxas",
+              maxrregcount,
+          ],
+          "//conditions:default": [],
+      }) + ["-fvisibility=hidden"]
+
+
+def _filter_impl(ctx):
+    suffix = ctx.attr.suffix
+    files = [src for src in ctx.files.srcs if src.path.endswith(suffix)]
+    return [DefaultInfo(files = depset(files))]
+_filter = rule(
+    implementation = _filter_impl,
+    attrs = {
+        "srcs": attr.label_list(allow_files = True),
+        "suffix": attr.string(),
+    },
+)
+"""Filters the srcs to the ones ending with suffix."""
+
+
+def _gen_link_src_impl(ctx):
+    ctx.actions.expand_template(
+        output = ctx.outputs.output,
+        template = ctx.file.template,
+        substitutions = {
+            "REGISTERLINKBINARYFILE": '"%s"' % ctx.file.register_hdr.short_path,
+            "FATBINFILE": '"%s"' % ctx.file.fatbin_hdr.short_path,
+        },
+    )
+_gen_link_src = rule(
+    implementation = _gen_link_src_impl,
+    attrs = {
+        "register_hdr": attr.label(allow_single_file = True),
+        "fatbin_hdr": attr.label(allow_single_file = True),
+        "template": attr.label(allow_single_file = True),
+        "output": attr.output(),
+    },
+)
+"""Patches the include directives for the link.stub file."""
+
+
+def device_link(name, srcs):
+    """Links seperately compiled relocatable device code into a cc_library."""
+
+    # From .a and .pic.a archives, just use the latter.
+    _filter(
+        name = name + "_pic_a",
+        srcs = srcs,
+        suffix = ".pic.a",
+    )
+
+    # Device-link to cubins for each architecture.
+    images = []
+    cubins = []
+    for arch in %{gpu_architectures}:
+        cubin = "%s_%s.cubin" % (name, arch)
+        register_hdr = "%s_%s.h" % (name, arch)
+        nvlink = "@local_config_nccl//:nvlink"
+        cmd = ("$(location %s) --cpu-arch=X86_64 " % nvlink +
+            "--arch=%s $(SRCS) " % arch +
+            "--register-link-binaries=$(location %s) " % register_hdr +
+            "--output-file=$(location %s)" % cubin)
+        native.genrule(
+            name = "%s_%s" % (name, arch),
+            outs = [register_hdr, cubin],
+            srcs = [name + "_pic_a"],
+            cmd = cmd,
+            tools = [nvlink],
+        )
+        images.append("--image=profile=%s,file=$(location %s)" % (arch, cubin))
+        cubins.append(cubin)
+
+    # Generate fatbin header from all cubins.
+    fatbin_hdr = name + ".fatbin.h"
+    fatbinary = "@local_config_nccl//:cuda/bin/fatbinary"
+    cmd = ("PATH=$$CUDA_TOOLKIT_PATH/bin:$$PATH " + # for bin2c
+          "$(location %s) -64 --cmdline=--compile-only --link " % fatbinary +
+          "--compress-all %s --create=%%{name}.fatbin " % " ".join(images) +
+          "--embedded-fatbin=$@")
+    native.genrule(
+        name = name + "_fatbin_h",
+        outs = [fatbin_hdr],
+        srcs = cubins,
+        cmd = cmd,
+        tools = [fatbinary],
+    )
+
+    # Generate the source file #including the headers generated above.
+    _gen_link_src(
+        name = name + "_cc",
+        # Include just the last one, they are equivalent.
+        register_hdr = register_hdr,
+        fatbin_hdr = fatbin_hdr,
+        template = "@local_config_nccl//:cuda/bin/crt/link.stub",
+        output = name + ".cc",
+    )
+
+    # Compile the source file into the cc_library.
+    native.cc_library(
+        name = name,
+        srcs = [name + "_cc"],
+        textual_hdrs = [register_hdr, fatbin_hdr],
+        deps = [
+            "@local_config_cuda//cuda:cuda_headers",
+            "@local_config_cuda//cuda:cudart_static",
+        ],
+    )
diff --git a/third_party/nccl/nccl_archive.BUILD b/third_party/nccl/nccl_archive.BUILD
deleted file mode 100644
index a05899e38d..0000000000
--- a/third_party/nccl/nccl_archive.BUILD
+++ /dev/null
@@ -1,68 +0,0 @@
-# NVIDIA nccl
-# A package of optimized primitives for collective multi-GPU communication.
-
-licenses(["notice"])  # BSD
-
-exports_files(["LICENSE.txt"])
-
-load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts", "if_cuda")
-
-SRCS = [
-    "src/all_gather.cu",
-    "src/all_reduce.cu",
-    "src/broadcast.cu",
-    "src/core.cu",
-    "src/libwrap.cu",
-    "src/reduce.cu",
-    "src/reduce_scatter.cu",
-]
-
-# Copy .cu to .cu.cc so they can be in srcs of cc_library.
-[
-    genrule(
-        name = "gen_" + src,
-        srcs = [src],
-        outs = [src + ".cc"],
-        cmd = "cp $(location " + src + ") $(location " + src + ".cc)",
-    )
-    for src in SRCS
-]
-
-SRCS_CU_CC = [src + ".cc" for src in SRCS]
-
-cc_library(
-    name = "nccl",
-    srcs = if_cuda(SRCS_CU_CC + glob(["src/*.h"])),
-    hdrs = if_cuda(["src/nccl.h"]),
-    copts = [
-        "-DCUDA_MAJOR=0",
-        "-DCUDA_MINOR=0",
-        "-DNCCL_MAJOR=0",
-        "-DNCCL_MINOR=0",
-        "-DNCCL_PATCH=0",
-        "-Iexternal/nccl_archive/src",
-        "-O3",
-    ] + cuda_default_copts(),
-    include_prefix = "third_party/nccl",
-    linkopts = select({
-        "@org_tensorflow//tensorflow:android": [
-            "-pie",
-        ],
-        "@org_tensorflow//tensorflow:darwin": [
-            "-Wl,-framework",
-            "-Wl,CoreFoundation",
-            "-Wl,-framework",
-            "-Wl,Security",
-        ],
-        "@org_tensorflow//tensorflow:ios": [],
-        "@org_tensorflow//tensorflow:windows": [
-            "-DEFAULTLIB:ws2_32.lib",
-        ],
-        "//conditions:default": [
-            "-lrt",
-        ],
-    }),
-    strip_include_prefix = "src",
-    visibility = ["//visibility:public"],
-    deps = ["@local_config_cuda//cuda:cuda_headers"],
-)
diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl
index ce9447096e..7f00df0962 100644
--- a/third_party/nccl/nccl_configure.bzl
+++ b/third_party/nccl/nccl_configure.bzl
@@ -5,16 +5,22 @@
 
   * `TF_NCCL_VERSION`: The NCCL version.
   * `NCCL_INSTALL_PATH`: The installation path of the NCCL library.
+  * `NCCL_HDR_PATH`: The installation path of the NCCL header files.
 """
 
 load(
     "//third_party/gpus:cuda_configure.bzl",
     "auto_configure_fail",
+    "compute_capabilities",
+    "cuda_toolkit_path",
     "find_cuda_define",
     "matches_version",
 )
 
+_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
+_NCCL_HDR_PATH = "NCCL_HDR_PATH"
 _NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
+_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
 _TF_NCCL_VERSION = "TF_NCCL_VERSION"
 _TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO"
 
@@ -35,6 +41,12 @@ cc_library(
 """
 
 _NCCL_ARCHIVE_BUILD_CONTENT = """
+exports_files([
+    "cuda/bin/crt/link.stub",
+    "cuda/bin/fatbinary",
+    "nvlink",
+])
+
 filegroup(
   name = "LICENSE",
   data = ["@nccl_archive//:LICENSE.txt"],
@@ -48,108 +60,125 @@ alias(
 )
 """
 
-# Local build results in dynamic link and the license should not be included.
-_NCCL_REMOTE_BUILD_TEMPLATE = Label("//third_party/nccl:remote.BUILD.tpl")
-_NCCL_LOCAL_BUILD_TEMPLATE = Label("//third_party/nccl:system.BUILD.tpl")
+def _label(file):
+    return Label("//third_party/nccl:{}".format(file))
 
 def _find_nccl_header(repository_ctx, nccl_install_path):
-  """Finds the NCCL header on the system.
-
-  Args:
-    repository_ctx: The repository context.
-    nccl_install_path: The NCCL library install directory.
-
-  Returns:
-    The path to the NCCL header.
-  """
-  header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path)
-  if not header_path.exists:
-    auto_configure_fail("Cannot find %s" % str(header_path))
-  return header_path
-
-
-def _check_nccl_version(repository_ctx, nccl_install_path, nccl_version):
-  """Checks whether the header file matches the specified version of NCCL.
-
-  Args:
-    repository_ctx: The repository context.
-    nccl_install_path: The NCCL library install directory.
-    nccl_version: The expected NCCL version.
-
-  Returns:
-    A string containing the library version of NCCL.
-  """
-  header_path = _find_nccl_header(repository_ctx, nccl_install_path)
-  header_dir = str(header_path.realpath.dirname)
-  major_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
-                                   _DEFINE_NCCL_MAJOR)
-  minor_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
-                                   _DEFINE_NCCL_MINOR)
-  patch_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
-                                   _DEFINE_NCCL_PATCH)
-  header_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
-  if not matches_version(nccl_version, header_version):
-    auto_configure_fail(
-        ("NCCL library version detected from %s/nccl.h (%s) does not match " +
-         "TF_NCCL_VERSION (%s). To fix this rerun configure again.") %
-        (header_dir, header_version, nccl_version))
-
-
-def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version):
-  """Finds the given NCCL library on the system.
-
-  Args:
-    repository_ctx: The repository context.
-    nccl_install_path: The NCCL library installation directory.
-    nccl_version: The version of NCCL library files as returned
-      by _nccl_version.
-
-  Returns:
-    The path to the NCCL library.
-  """
-  lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path,
-                                                           nccl_version))
-  if not lib_path.exists:
-    auto_configure_fail("Cannot find NCCL library %s" % str(lib_path))
-  return lib_path
-
+    """Finds the NCCL header on the system.
+
+    Args:
+      repository_ctx: The repository context.
+      nccl_install_path: The NCCL library install directory.
+
+    Returns:
+      The path to the NCCL header.
+    """
+    header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path)
+    if not header_path.exists:
+        auto_configure_fail("Cannot find %s" % str(header_path))
+    return header_path
+
+def _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version):
+    """Checks whether the header file matches the specified version of NCCL.
+
+    Args:
+      repository_ctx: The repository context.
+      nccl_install_path: The NCCL library install directory.
+      nccl_hdr_path: The NCCL header path.
+      nccl_version: The expected NCCL version.
+
+    Returns:
+      A string containing the library version of NCCL.
+    """
+    header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path)
+    if not header_path.exists:
+        header_path = _find_nccl_header(repository_ctx, nccl_install_path)
+    header_dir = str(header_path.realpath.dirname)
+    major_version = find_cuda_define(
+        repository_ctx,
+        header_dir,
+        "nccl.h",
+        _DEFINE_NCCL_MAJOR,
+    )
+    minor_version = find_cuda_define(
+        repository_ctx,
+        header_dir,
+        "nccl.h",
+        _DEFINE_NCCL_MINOR,
+    )
+    patch_version = find_cuda_define(
+        repository_ctx,
+        header_dir,
+        "nccl.h",
+        _DEFINE_NCCL_PATCH,
+    )
+    header_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
+    if not matches_version(nccl_version, header_version):
+        auto_configure_fail(
+            ("NCCL library version detected from %s/nccl.h (%s) does not match " +
+             "TF_NCCL_VERSION (%s). To fix this rerun configure again.") %
+            (header_dir, header_version, nccl_version),
+        )
 
 def _nccl_configure_impl(repository_ctx):
-  """Implementation of the nccl_configure repository rule."""
-  if _TF_NCCL_VERSION not in repository_ctx.os.environ:
-    # Add a dummy build file to make bazel query happy.
-    repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
-    return
-
-  if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ:
-    # Forward to the pre-configured remote repository.
-    repository_ctx.template("BUILD", _NCCL_REMOTE_BUILD_TEMPLATE, {
-        "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO],
-    })
-    return
-
-  nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip()
-  if matches_version("1", nccl_version):
-    # Alias to GitHub target from @nccl_archive.
-    if not matches_version(nccl_version, "1.3"):
-      auto_configure_fail(
-          "NCCL from GitHub must use version 1.3 (got %s)" % nccl_version)
-    repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT)
-  else:
-    # Create target for locally installed NCCL.
-    nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
-    _check_nccl_version(repository_ctx, nccl_install_path, nccl_version)
-    repository_ctx.template("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE, {
-        "%{version}": nccl_version,
-        "%{install_path}": nccl_install_path,
-    })
-
+    """Implementation of the nccl_configure repository rule."""
+    if _TF_NCCL_VERSION not in repository_ctx.os.environ:
+        # Add a dummy build file to make bazel query happy.
+        repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
+        return
+
+    if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ:
+        # Forward to the pre-configured remote repository.
+        repository_ctx.template("BUILD", _label("remote.BUILD.tpl"), {
+            "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO],
+        })
+        return
+
+    nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip()
+    if nccl_version == "":
+        # Alias to open source build from @nccl_archive.
+        repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT)
+
+        # TODO(csigg): implement and reuse in cuda_configure.bzl.
+        gpu_architectures = [
+            "sm_" + capability.replace(".", "")
+            for capability in compute_capabilities(repository_ctx)
+        ]
+
+        # Round-about way to make the list unique.
+        gpu_architectures = dict(zip(gpu_architectures, gpu_architectures)).keys()
+        repository_ctx.template("build_defs.bzl", _label("build_defs.bzl.tpl"), {
+            "%{gpu_architectures}": str(gpu_architectures),
+        })
+
+        repository_ctx.symlink(cuda_toolkit_path(repository_ctx), "cuda")
+
+        # Temporary work-around for setups which symlink ptxas to a newer
+        # version. The versions of nvlink and ptxas need to agree, so we find
+        # nvlink next to the real location of ptxas. This is only temporary and
+        # will be removed again soon.
+        nvlink_dir = repository_ctx.path("cuda/bin/ptxas").realpath.dirname
+        repository_ctx.symlink(nvlink_dir.get_child("nvlink"), "nvlink")
+    else:
+        # Create target for locally installed NCCL.
+        nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
+        nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip()
+        _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version)
+        repository_ctx.template("BUILD", _label("system.BUILD.tpl"), {
+            "%{version}": nccl_version,
+            "%{install_path}": nccl_install_path,
+            "%{hdr_path}": nccl_hdr_path,
+        })
 
 nccl_configure = repository_rule(
-    implementation=_nccl_configure_impl,
-    environ=[
+    implementation = _nccl_configure_impl,
+    environ = [
+        _CUDA_TOOLKIT_PATH,
+        _NCCL_HDR_PATH,
         _NCCL_INSTALL_PATH,
         _TF_NCCL_VERSION,
+        _TF_CUDA_COMPUTE_CAPABILITIES,
+        _TF_NCCL_CONFIG_REPO,
     ],
 )
 """Detects and configures the NCCL configuration.
diff --git a/third_party/nccl/system.BUILD.tpl b/third_party/nccl/system.BUILD.tpl
index 7ca835dedf..a07f54955f 100644
--- a/third_party/nccl/system.BUILD.tpl
+++ b/third_party/nccl/system.BUILD.tpl
@@ -20,7 +20,7 @@ genrule(
     "libnccl.so.%{version}",
     "nccl.h",
   ],
-  cmd = """cp "%{install_path}/include/nccl.h" "$(@D)/nccl.h" &&
-           cp "%{install_path}/lib/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """,
+  cmd = """cp "%{hdr_path}/nccl.h" "$(@D)/nccl.h" &&
+           cp "%{install_path}/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """,
 )
 
diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD
index 31aa3cee51..6602a480af 100644
--- a/third_party/ngraph/ngraph.BUILD
+++ b/third_party/ngraph/ngraph.BUILD
@@ -3,6 +3,121 @@ licenses(["notice"])  # 3-Clause BSD
 exports_files(["LICENSE"])
 
 cc_library(
+    name = "ngraph_headers",
+    hdrs = glob(["src/ngraph/**/*.hpp"]),
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "ngraph_cpu_backend",
+    srcs = [
+        "src/ngraph/runtime/cpu/builder/add.cpp",
+        "src/ngraph/runtime/cpu/builder/allreduce.cpp",
+        "src/ngraph/runtime/cpu/builder/argmax.cpp",
+        "src/ngraph/runtime/cpu/builder/argmin.cpp",
+        "src/ngraph/runtime/cpu/builder/avg_pool.cpp",
+        "src/ngraph/runtime/cpu/builder/batch_norm.cpp",
+        "src/ngraph/runtime/cpu/builder/bounded_relu.cpp",
+        "src/ngraph/runtime/cpu/builder/broadcast.cpp",
+        "src/ngraph/runtime/cpu/builder/concat.cpp",
+        "src/ngraph/runtime/cpu/builder/convert.cpp",
+        "src/ngraph/runtime/cpu/builder/convert_layout.cpp",
+        "src/ngraph/runtime/cpu/builder/convolution.cpp",
+        "src/ngraph/runtime/cpu/builder/dot.cpp",
+        "src/ngraph/runtime/cpu/builder/function_call.cpp",
+        "src/ngraph/runtime/cpu/builder/lrn.cpp",
+        "src/ngraph/runtime/cpu/builder/lstm.cpp",
+        "src/ngraph/runtime/cpu/builder/matmul_bias.cpp",
+        "src/ngraph/runtime/cpu/builder/max.cpp",
+        "src/ngraph/runtime/cpu/builder/max_pool.cpp",
+        "src/ngraph/runtime/cpu/builder/min.cpp",
+        "src/ngraph/runtime/cpu/builder/one_hot.cpp",
+        "src/ngraph/runtime/cpu/builder/pad.cpp",
+        "src/ngraph/runtime/cpu/builder/product.cpp",
+        "src/ngraph/runtime/cpu/builder/quantize.cpp",
+        "src/ngraph/runtime/cpu/builder/quantized_avg_pool.cpp",
+        "src/ngraph/runtime/cpu/builder/quantized_max_pool.cpp",
+        "src/ngraph/runtime/cpu/builder/reduce_function.cpp",
+        "src/ngraph/runtime/cpu/builder/reduce_function_window.cpp",
+        "src/ngraph/runtime/cpu/builder/relu.cpp",
+        "src/ngraph/runtime/cpu/builder/replace_slice.cpp",
+        "src/ngraph/runtime/cpu/builder/reshape.cpp",
+        "src/ngraph/runtime/cpu/builder/reverse.cpp",
+        "src/ngraph/runtime/cpu/builder/reverse_sequence.cpp",
+        "src/ngraph/runtime/cpu/builder/rnn.cpp",
+        "src/ngraph/runtime/cpu/builder/select.cpp",
+        "src/ngraph/runtime/cpu/builder/select_and_scatter.cpp",
+        "src/ngraph/runtime/cpu/builder/sigmoid.cpp",
+        "src/ngraph/runtime/cpu/builder/slice.cpp",
+        "src/ngraph/runtime/cpu/builder/softmax.cpp",
+        "src/ngraph/runtime/cpu/builder/sum.cpp",
+        "src/ngraph/runtime/cpu/builder/topk.cpp",
+        "src/ngraph/runtime/cpu/cpu_backend.cpp",
+        "src/ngraph/runtime/cpu/cpu_builder.cpp",
+        "src/ngraph/runtime/cpu/cpu_call_frame.cpp",
+        "src/ngraph/runtime/cpu/cpu_external_function.cpp",
+        "src/ngraph/runtime/cpu/cpu_kernels.cpp",
+        "src/ngraph/runtime/cpu/cpu_layout_descriptor.cpp",
+        "src/ngraph/runtime/cpu/cpu_tensor_view.cpp",
+        "src/ngraph/runtime/cpu/cpu_tensor_view_wrapper.cpp",
+        "src/ngraph/runtime/cpu/cpu_tracing.cpp",
+        "src/ngraph/runtime/cpu/kernel/eigen_thread_pool.cpp",
+        "src/ngraph/runtime/cpu/kernel/pad.cpp",
+        "src/ngraph/runtime/cpu/kernel/reduce_max.cpp",
+        "src/ngraph/runtime/cpu/kernel/reduce_sum.cpp",
+        "src/ngraph/runtime/cpu/kernel/reshape.cpp",
+        "src/ngraph/runtime/cpu/mkldnn_emitter.cpp",
+        "src/ngraph/runtime/cpu/mkldnn_invoke.cpp",
+        "src/ngraph/runtime/cpu/mkldnn_utils.cpp",
+        "src/ngraph/runtime/cpu/op/batch_dot.cpp",
+        "src/ngraph/runtime/cpu/op/batch_norm_relu.cpp",
+        "src/ngraph/runtime/cpu/op/bounded_relu.cpp",
+        "src/ngraph/runtime/cpu/op/conv_add.cpp",
+        "src/ngraph/runtime/cpu/op/conv_bias.cpp",
+        "src/ngraph/runtime/cpu/op/conv_relu.cpp",
+        "src/ngraph/runtime/cpu/op/convert_layout.cpp",
+        "src/ngraph/runtime/cpu/op/dequantize.cpp",
+        "src/ngraph/runtime/cpu/op/group_conv.cpp",
+        "src/ngraph/runtime/cpu/op/loop_kernel.cpp",
+        "src/ngraph/runtime/cpu/op/lstm.cpp",
+        "src/ngraph/runtime/cpu/op/matmul_bias.cpp",
+        "src/ngraph/runtime/cpu/op/max_pool_with_indices.cpp",
+        "src/ngraph/runtime/cpu/op/quantize.cpp",
+        "src/ngraph/runtime/cpu/op/quantized_avg_pool.cpp",
+        "src/ngraph/runtime/cpu/op/quantized_max_pool.cpp",
+        "src/ngraph/runtime/cpu/op/rnn.cpp",
+        "src/ngraph/runtime/cpu/op/sigmoid_mul.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_assignment.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_collapse_dims.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_concat_inputs.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_fusion.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_layout.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_loop_kernel_fusion.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_mat_fusion.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_post_layout_optimizations.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_rnn_fusion.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_workspace_insertion.cpp",
+    ],
+    hdrs = glob(["src/ngraph/runtime/cpu/**/*.hpp"]) + glob([]),
+    deps = [
+        ":ngraph_headers",
+        "@eigen_archive//:eigen",
+        "@nlohmann_json_lib",
+        "@tbb",
+        "@mkl_dnn//:mkl_dnn",
+    ],
+    copts = [
+        "-I external/ngraph/src",
+        "-I external/nlohmann_json_lib/include/",
+        '-D SHARED_LIB_EXT=\\".so\\"',
+        '-D NGRAPH_VERSION=\\"0.8.1\\"',
+        "-D NGRAPH_DEX_ONLY",
+    ],
+    visibility = ["//visibility:public"],
+    alwayslink = 1,
+)
+
+cc_library(
     name = "ngraph_core",
     srcs = glob([
         "src/ngraph/*.cpp",
@@ -18,11 +133,10 @@ cc_library(
         "src/ngraph/pass/*.hpp",
         "src/ngraph/runtime/*.cpp",
         "src/ngraph/type/*.cpp",
-        "src/ngraph/runtime/interpreter/*.cpp",
-        "src/ngraph/runtime/interpreter/*.hpp",
     ]),
-    hdrs = glob(["src/ngraph/**/*.hpp"]),
     deps = [
+        ":ngraph_headers",
+        ":ngraph_cpu_backend",
         "@eigen_archive//:eigen",
         "@nlohmann_json_lib",
     ],
@@ -30,7 +144,7 @@ cc_library(
         "-I external/ngraph/src",
         "-I external/nlohmann_json_lib/include/",
         '-D SHARED_LIB_EXT=\\".so\\"',
-        '-D NGRAPH_VERSION=\\"0.5.0\\"',
+        '-D NGRAPH_VERSION=\\"0.8.1\\"',
     ],
     visibility = ["//visibility:public"],
     alwayslink = 1,
diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD
index 4d96ccf2f2..dbedca0a03 100644
--- a/third_party/ngraph/ngraph_tf.BUILD
+++ b/third_party/ngraph/ngraph_tf.BUILD
@@ -8,46 +8,44 @@ load(
 )
 
 cc_library(
-    name = "ngraph_libs_linux",
-    srcs = [
-        "lib/libiomp5.so",
-        "lib/libmklml_intel.so",
-    ],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
     name = "ngraph_tf",
     srcs = [
-        "src/ngraph_builder.h",
+        "src/ngraph_api.cc",
+        "src/ngraph_api.h",
+        "src/ngraph_assign_clusters.cc",
+        "src/ngraph_assign_clusters.h",
         "src/ngraph_builder.cc",
-        "src/ngraph_cluster.h",
-        "src/ngraph_cluster.cc",
-        "src/ngraph_cluster_manager.h",
+        "src/ngraph_builder.h",
+        "src/ngraph_capture_variables.cc",
+        "src/ngraph_capture_variables.h",
         "src/ngraph_cluster_manager.cc",
-        "src/ngraph_confirm_pass.cc",
-        "src/ngraph_device.cc",
+        "src/ngraph_cluster_manager.h",
+        "src/ngraph_conversions.h",
+        "src/ngraph_deassign_clusters.cc",
+        "src/ngraph_deassign_clusters.h",
+        "src/ngraph_encapsulate_clusters.cc",
+        "src/ngraph_encapsulate_clusters.h",
         "src/ngraph_encapsulate_op.cc",
-        "src/ngraph_encapsulate_pass.cc",
-        "src/ngraph_freshness_tracker.h",
         "src/ngraph_freshness_tracker.cc",
-        "src/ngraph_graph_rewrite_passes.cc",
-        "src/ngraph_liberate_pass.cc",
-        "src/ngraph_op_kernels.cc",
-        "src/ngraph_stub_ops.cc",
-        "src/ngraph_utils.h",
+        "src/ngraph_freshness_tracker.h",
+        "src/ngraph_mark_for_clustering.cc",
+        "src/ngraph_mark_for_clustering.h",
+        "src/ngraph_rewrite_for_tracking.cc",
+        "src/ngraph_rewrite_for_tracking.h",
+        "src/ngraph_rewrite_pass.cc",
+        "src/ngraph_tracked_variable.cc",
         "src/ngraph_utils.cc",
-        "src/ngraph_send_recv_ops.cc",
-        "src/ngraph_variable_ops.cc",
+        "src/ngraph_utils.h",
+        "src/ngraph_version_utils.h",
+        "src/tf_deadness_analysis.cc",
+        "src/tf_deadness_analysis.h",
         "src/tf_graphcycles.cc",
+        "src/tf_graphcycles.h",
         "logging/ngraph_log.h",
         "logging/ngraph_log.cc",
         "logging/tf_graph_writer.h",
         "logging/tf_graph_writer.cc",
     ],
-    hdrs = [
-        "src/tf_graphcycles.h",
-    ],
     deps = [
         "@org_tensorflow//tensorflow/core:protos_all_proto_text",
         "@org_tensorflow//tensorflow/core:framework_headers_lib",
@@ -58,7 +56,6 @@ cc_library(
         "-I external/ngraph_tf/src",
         "-I external/ngraph_tf/logging",
         "-I external/ngraph/src",
-        "-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1",
     ],
     alwayslink = 1,
     visibility = ["//visibility:public"],
@@ -68,8 +65,19 @@ tf_cc_test(
     name = "ngraph_tf_tests",
     size = "small",
     srcs = [
-        "test/tf_exec.cpp",
+        "test/conversions.cpp",
+        "test/graph_rewrites/assign_clusters.cc",
+        "test/graph_rewrites/deadness_test.cc",
         "test/main.cpp",
+        "test/opexecuter.cpp",
+        "test/opexecuter.h",
+        "test/padding.cpp",
+        "test/test_array_ops.cpp",
+        "test/test_math_ops.cpp",
+        "test/test_nn_ops.cpp",
+        "test/test_utilities.cpp",
+        "test/test_utilities.h",
+        "test/tf_exec.cpp",
     ],
     deps = [
         ":ngraph_tf",
@@ -80,7 +88,6 @@ tf_cc_test(
     ],
     extra_copts = [
         "-fexceptions ",
-        "-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1",
         "-I external/ngraph_tf/src",
         "-I external/ngraph_tf/logging",
         "-I external/ngraph/src",
diff --git a/third_party/ngraph/tbb.BUILD b/third_party/ngraph/tbb.BUILD
new file mode 100644
index 0000000000..04e6544ffb
--- /dev/null
+++ b/third_party/ngraph/tbb.BUILD
@@ -0,0 +1,63 @@
+licenses(["notice"])  # 3-Clause BSD
+
+exports_files(["LICENSE"])
+
+# Taken from: https://github.com/rnburn/satyr/blob/master/bazel/tbb.BUILD
+# License for this BUILD file: MIT
+# See: https://github.com/rnburn/satyr/blob/master/LICENSE
+#
+# License for TBB: Apache 2.0
+# See: https://github.com/01org/tbb/blob/tbb_2018/LICENSE
+
+genrule(
+    name = "build_tbb",
+    srcs = glob(["**"]) + [
+        "@local_config_cc//:toolchain",
+    ],
+    cmd = """
+	    set -e
+	    WORK_DIR=$$PWD
+		DEST_DIR=$$PWD/$(@D)
+        export PATH=$$(dirname $(AR)):$$PATH
+		export CXXFLAGS=$(CC_FLAGS)
+		export NM=$(NM)
+		export AR=$(AR)
+		cd $$(dirname $(location :Makefile))
+
+        #TBB's build needs some help to figure out what compiler it's using
+        if $$CXX --version | grep clang &> /dev/null; then 
+           COMPILER_OPT="compiler=clang"
+        else
+			COMPILER_OPT="compiler=gcc"
+
+          #  # Workaround for TBB bug
+          #  # See https://github.com/01org/tbb/issues/59
+          #  CXXFLAGS="$$CXXFLAGS -flifetime-dse=1"
+        fi 
+
+        # uses extra_inc=big_iron.inc to specify that static libraries are
+        # built. See https://software.intel.com/en-us/forums/intel-threading-building-blocks/topic/297792
+        make tbb_build_prefix="build" \
+              extra_inc=big_iron.inc \
+              $$COMPILER_OPT; \
+
+        echo cp build/build_{release,debug}/*.a $$DEST_DIR
+        cp build/build_{release,debug}/*.a $$DEST_DIR
+		cd $$WORK_DIR
+	""",
+    outs = [
+        "libtbb.a",
+        "libtbbmalloc.a",
+    ],
+)
+
+cc_library(
+    name = "tbb",
+    hdrs = glob([
+        "include/serial/**",
+        "include/tbb/**/**",
+    ]),
+    srcs = ["libtbb.a"],
+    includes = ["include"],
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl
index 3c7e5c8469..53264630a1 100644
--- a/third_party/py/python_configure.bzl
+++ b/third_party/py/python_configure.bzl
@@ -130,8 +130,8 @@ def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name,
       # If we have only one file to link we do not want to use the dest_dir, as
       # $(@D) will include the full path to the file.
       dest = '$(@D)/' + dest_dir + dest_files[i] if len(dest_files) != 1 else '$(@D)/' + dest_files[i]
-      # On Windows, symlink is not supported, so we just copy all the files.
-      cmd = 'cp -f' if _is_windows(repository_ctx) else 'ln -s'
+      # Copy the headers to create a sandboxable setup.
+      cmd = 'cp -f'
       command.append(cmd + ' "%s" "%s"' % (src_files[i] , dest))
       outs.append('        "' + dest_dir + dest_files[i] + '",')
   genrule = _genrule(src_dir, genrule_name, " && ".join(command),
diff --git a/third_party/repo.bzl b/third_party/repo.bzl
index 7d1aa5dce9..6e30618d39 100644
--- a/third_party/repo.bzl
+++ b/third_party/repo.bzl
@@ -119,6 +119,10 @@ def _tf_http_archive(ctx):
             "%prefix%": ".." if _repos_are_siblings() else "external",
         }, False)
 
+    if use_syslib:
+        for internal_src, external_dest in ctx.attr.system_link_files.items():
+            ctx.symlink(Label(internal_src), ctx.path(external_dest))
+
 tf_http_archive = repository_rule(
     implementation = _tf_http_archive,
     attrs = {
@@ -130,6 +134,7 @@ tf_http_archive = repository_rule(
         "patch_file": attr.label(),
         "build_file": attr.label(),
         "system_build_file": attr.label(),
+        "system_link_files": attr.string_dict(),
     },
     environ = [
         "TF_SYSTEM_LIBS",
@@ -180,7 +185,16 @@ def _third_party_http_archive(ctx):
             _apply_patch(ctx, ctx.attr.patch_file)
         ctx.symlink(Label(ctx.attr.build_file), buildfile_path)
 
+    link_dict = dict()
+    if use_syslib:
+        link_dict.update(ctx.attr.system_link_files)
+
     for internal_src, external_dest in ctx.attr.link_files.items():
+        # if syslib and link exists in both, use the system one
+        if external_dest not in link_dict.values():
+            link_dict[internal_src] = external_dest
+
+    for internal_src, external_dest in link_dict.items():
         ctx.symlink(Label(internal_src), ctx.path(external_dest))
 
 # Downloads and creates Bazel repos for dependencies.
@@ -201,6 +215,7 @@ third_party_http_archive = repository_rule(
         "system_build_file": attr.string(mandatory = False),
         "patch_file": attr.label(),
         "link_files": attr.string_dict(),
+        "system_link_files": attr.string_dict(),
     },
     environ = [
         "TF_SYSTEM_LIBS",
diff --git a/third_party/systemlibs/absl_py.BUILD b/third_party/systemlibs/absl_py.BUILD
new file mode 100644
index 0000000000..fe756e1be2
--- /dev/null
+++ b/third_party/systemlibs/absl_py.BUILD
@@ -0,0 +1 @@
+licenses(["notice"])  # Apache 2.0
diff --git a/third_party/systemlibs/absl_py.absl.flags.BUILD b/third_party/systemlibs/absl_py.absl.flags.BUILD
new file mode 100644
index 0000000000..95ec92b887
--- /dev/null
+++ b/third_party/systemlibs/absl_py.absl.flags.BUILD
@@ -0,0 +1,11 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+filegroup(
+    name = "LICENSE",
+)
+
+py_library(
+    name = "flags",
+)
diff --git a/third_party/systemlibs/absl_py.absl.testing.BUILD b/third_party/systemlibs/absl_py.absl.testing.BUILD
new file mode 100644
index 0000000000..c1b794c1e9
--- /dev/null
+++ b/third_party/systemlibs/absl_py.absl.testing.BUILD
@@ -0,0 +1,7 @@
+licenses(["notice"])  # Apache 2.0
+
+py_library(
+    name = "parameterized",
+    testonly = 1,
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/systemlibs/boringssl.BUILD b/third_party/systemlibs/boringssl.BUILD
new file mode 100644
index 0000000000..bc4c533403
--- /dev/null
+++ b/third_party/systemlibs/boringssl.BUILD
@@ -0,0 +1,21 @@
+licenses(["notice"])
+
+filegroup(
+    name = "LICENSE",
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "crypto",
+    linkopts = ["-lcrypto"],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "ssl",
+    linkopts = ["-lssl"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":crypto",
+    ],
+)
diff --git a/third_party/systemlibs/double_conversion.BUILD b/third_party/systemlibs/double_conversion.BUILD
new file mode 100644
index 0000000000..568460181a
--- /dev/null
+++ b/third_party/systemlibs/double_conversion.BUILD
@@ -0,0 +1,12 @@
+licenses(["notice"])
+
+filegroup(
+    name = "LICENSE",
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "double-conversion",
+    linkopts = ["-ldouble-conversion"],
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/systemlibs/gast.BUILD b/third_party/systemlibs/gast.BUILD
new file mode 100644
index 0000000000..c6e1d0c4e0
--- /dev/null
+++ b/third_party/systemlibs/gast.BUILD
@@ -0,0 +1,12 @@
+licenses(["notice"])  # BSD 3-clause
+
+filegroup(
+    name = "PKG-INFO",
+    visibility = ["//visibility:public"],
+)
+
+py_library(
+    name = "gast",
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/systemlibs/google_cloud_cpp.BUILD b/third_party/systemlibs/google_cloud_cpp.BUILD
new file mode 100644
index 0000000000..cbe6e10ba5
--- /dev/null
+++ b/third_party/systemlibs/google_cloud_cpp.BUILD
@@ -0,0 +1,6 @@
+licenses(["notice"])  # Apache 2.0
+
+filegroup(
+    name = "LICENSE",
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/systemlibs/google_cloud_cpp.google.cloud.bigtable.BUILD b/third_party/systemlibs/google_cloud_cpp.google.cloud.bigtable.BUILD
new file mode 100644
index 0000000000..b59d565390
--- /dev/null
+++ b/third_party/systemlibs/google_cloud_cpp.google.cloud.bigtable.BUILD
@@ -0,0 +1,7 @@
+licenses(["notice"])  # Apache 2.0
+
+cc_library(
+    name = "bigtable_client",
+    linkopts = ["-lbigtable_client"],
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/systemlibs/googleapis.BUILD b/third_party/systemlibs/googleapis.BUILD
new file mode 100644
index 0000000000..7687745df9
--- /dev/null
+++ b/third_party/systemlibs/googleapis.BUILD
@@ -0,0 +1,12 @@
+licenses(["notice"])  # Apache 2.0
+
+filegroup(
+    name = "LICENSE",
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "bigtable_protos",
+    linkopts = ["-lbigtable_protos"],
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/systemlibs/jemalloc.BUILD b/third_party/systemlibs/jemalloc.BUILD
deleted file mode 100644
index 6a48d582ba..0000000000
--- a/third_party/systemlibs/jemalloc.BUILD
+++ /dev/null
@@ -1,30 +0,0 @@
-licenses(["notice"])  # BSD
-
-filegroup(
-    name = "COPYING",
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "jemalloc_headers",
-    defines = [
-        "jemalloc_posix_memalign=posix_memalign",
-        "jemalloc_malloc=malloc",
-        "jemalloc_realloc=realloc",
-        "jemalloc_free=free",
-    ],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "jemalloc_impl",
-    linkopts = ["-ljemalloc"],
-    defines = [
-        "jemalloc_posix_memalign=posix_memalign",
-        "jemalloc_malloc=malloc",
-        "jemalloc_realloc=realloc",
-        "jemalloc_free=free",
-    ],
-    visibility = ["//visibility:public"],
-    deps = [":jemalloc_headers"],
-)
diff --git a/third_party/systemlibs/jsoncpp.BUILD b/third_party/systemlibs/jsoncpp.BUILD
index cf91917cfb..526fd0c418 100644
--- a/third_party/systemlibs/jsoncpp.BUILD
+++ b/third_party/systemlibs/jsoncpp.BUILD
@@ -23,7 +23,7 @@ genrule(
     cmd = """
       for i in $(OUTS); do
         i=$${i##*/}
-        ln -vsf /usr/include/jsoncpp/json/$$i $(@D)/include/json/$$i
+        ln -sf $(INCLUDEDIR)/jsoncpp/json/$$i $(@D)/include/json/$$i
       done
     """,
 )
diff --git a/third_party/systemlibs/syslibs_configure.bzl b/third_party/systemlibs/syslibs_configure.bzl
index 8b09c9ac1f..b03d3380d7 100644
--- a/third_party/systemlibs/syslibs_configure.bzl
+++ b/third_party/systemlibs/syslibs_configure.bzl
@@ -10,14 +10,19 @@
 _TF_SYSTEM_LIBS = "TF_SYSTEM_LIBS"
 
 VALID_LIBS = [
+    "absl_py",
     "astor_archive",
+    "boringssl",
+    "com_github_googleapis_googleapis",
+    "com_github_googlecloudplatform_google_cloud_cpp",
     "com_googlesource_code_re2",
     "curl",
     "cython",
+    "double_conversion",
     "flatbuffers",
+    "gast_archive",
     "gif_archive",
     "grpc",
-    "jemalloc",
     "jpeg",
     "jsoncpp_git",
     "lmdb",
diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD
index 4303751452..bcbc4dda11 100644
--- a/third_party/toolchains/BUILD
+++ b/third_party/toolchains/BUILD
@@ -26,12 +26,10 @@ platform(
     constraint_values = [
         "@bazel_tools//platforms:x86_64",
         "@bazel_tools//platforms:linux",
-        "@bazel_tools//tools/cpp:clang",
-        "@bazel_toolchains//constraints:xenial",
     ],
     remote_execution_properties = """
         properties: {
             name: "container-image"
-            value:"docker://gcr.io/asci-toolchain/nosla-cuda9.0-cudnn7-ubuntu14.04@sha256:ae58329b961e7c17d89725bf8fd72dfbd5850f4f3313de58e0cafbf5b0343735"
+            value:"docker://gcr.io/asci-toolchain/nosla-cuda9.0-cudnn7-ubuntu14.04@sha256:e5099ff15650986e268a43ee99e2d2b7ffe2459b8b6935385078d1d3b2ed4d02"
         }""",
 )
diff --git a/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/BUILD b/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/BUILD
index 2d3e41127d..05abcb56d8 100755
--- a/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/BUILD
+++ b/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/BUILD
@@ -1253,7 +1253,7 @@ genrule(
         "cuda/lib/libcupti.so.9.0",
     ],
     cmd = """
-if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so" "$(@D)/cuda/lib/libcuda.so" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart.so.9.0.176" "$(@D)/cuda/lib/libcudart.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart_static.a" "$(@D)/cuda/lib/libcudart_static.a" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcublas.so.9.0.480" "$(@D)/cuda/lib/libcublas.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcusolver.so.9.0.176" "$(@D)/cuda/lib/libcusolver.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcurand.so.9.0.176" "$(@D)/cuda/lib/libcurand.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcufft.so.9.0.176" "$(@D)/cuda/lib/libcufft.so.9.0" && cp "/usr/lib/x86_64-linux-gnu/libcudnn.so.7.2.1" "$(@D)/cuda/lib/libcudnn.so.7" && cp "/usr/local/cuda-9.0/extras/CUPTI/lib64/libcupti.so.9.0.176" "$(@D)/cuda/lib/libcupti.so.9.0"
+if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so" "$(@D)/cuda/lib/libcuda.so" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart.so.9.0.176" "$(@D)/cuda/lib/libcudart.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart_static.a" "$(@D)/cuda/lib/libcudart_static.a" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcublas.so.9.0.480" "$(@D)/cuda/lib/libcublas.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcusolver.so.9.0.176" "$(@D)/cuda/lib/libcusolver.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcurand.so.9.0.176" "$(@D)/cuda/lib/libcurand.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcufft.so.9.0.176" "$(@D)/cuda/lib/libcufft.so.9.0" && cp "/usr/lib/x86_64-linux-gnu/libcudnn.so.7.1.4" "$(@D)/cuda/lib/libcudnn.so.7" && cp "/usr/local/cuda-9.0/extras/CUPTI/lib64/libcupti.so.9.0.176" "$(@D)/cuda/lib/libcupti.so.9.0"
    """,
 )
 
diff --git a/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/BUILD b/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/BUILD
index a56b4513fb..6442e7628a 100755
--- a/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/BUILD
+++ b/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/BUILD
@@ -2,6 +2,20 @@ licenses(["restricted"])
 
 package(default_visibility = ["//visibility:public"])
 
+toolchain(
+    name = "toolchain-linux-x86_64",
+    exec_compatible_with = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    target_compatible_with = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    toolchain = ":cc-compiler-local",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
 cc_toolchain_suite(
     name = "toolchain",
     toolchains = {