aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/nccl/nccl_configure.bzl
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/nccl/nccl_configure.bzl')
-rw-r--r--third_party/nccl/nccl_configure.bzl214
1 files changed, 118 insertions, 96 deletions
diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl
index d78fe8f3aa..7f00df0962 100644
--- a/third_party/nccl/nccl_configure.bzl
+++ b/third_party/nccl/nccl_configure.bzl
@@ -11,12 +11,16 @@
load(
"//third_party/gpus:cuda_configure.bzl",
"auto_configure_fail",
+ "compute_capabilities",
+ "cuda_toolkit_path",
"find_cuda_define",
"matches_version",
)
-_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
+_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
_NCCL_HDR_PATH = "NCCL_HDR_PATH"
+_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
+_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
_TF_NCCL_VERSION = "TF_NCCL_VERSION"
_TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO"
@@ -37,6 +41,12 @@ cc_library(
"""
_NCCL_ARCHIVE_BUILD_CONTENT = """
+exports_files([
+ "cuda/bin/crt/link.stub",
+ "cuda/bin/fatbinary",
+ "nvlink",
+])
+
filegroup(
name = "LICENSE",
data = ["@nccl_archive//:LICENSE.txt"],
@@ -50,113 +60,125 @@ alias(
)
"""
-# Local build results in dynamic link and the license should not be included.
-_NCCL_REMOTE_BUILD_TEMPLATE = Label("//third_party/nccl:remote.BUILD.tpl")
-_NCCL_LOCAL_BUILD_TEMPLATE = Label("//third_party/nccl:system.BUILD.tpl")
+def _label(file):
+ return Label("//third_party/nccl:{}".format(file))
def _find_nccl_header(repository_ctx, nccl_install_path):
- """Finds the NCCL header on the system.
-
- Args:
- repository_ctx: The repository context.
- nccl_install_path: The NCCL library install directory.
+ """Finds the NCCL header on the system.
- Returns:
- The path to the NCCL header.
- """
- header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path)
- if not header_path.exists:
- auto_configure_fail("Cannot find %s" % str(header_path))
- return header_path
+ Args:
+ repository_ctx: The repository context.
+ nccl_install_path: The NCCL library install directory.
+ Returns:
+ The path to the NCCL header.
+ """
+ header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path)
+ if not header_path.exists:
+ auto_configure_fail("Cannot find %s" % str(header_path))
+ return header_path
def _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version):
- """Checks whether the header file matches the specified version of NCCL.
-
- Args:
- repository_ctx: The repository context.
- nccl_install_path: The NCCL library install directory.
- nccl_version: The expected NCCL version.
-
- Returns:
- A string containing the library version of NCCL.
- """
- header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path)
- if not header_path.exists:
- header_path = _find_nccl_header(repository_ctx, nccl_install_path)
- header_dir = str(header_path.realpath.dirname)
- major_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
- _DEFINE_NCCL_MAJOR)
- minor_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
- _DEFINE_NCCL_MINOR)
- patch_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
- _DEFINE_NCCL_PATCH)
- header_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
- if not matches_version(nccl_version, header_version):
- auto_configure_fail(
- ("NCCL library version detected from %s/nccl.h (%s) does not match " +
- "TF_NCCL_VERSION (%s). To fix this rerun configure again.") %
- (header_dir, header_version, nccl_version))
-
-
-def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version):
- """Finds the given NCCL library on the system.
-
- Args:
- repository_ctx: The repository context.
- nccl_install_path: The NCCL library installation directory.
- nccl_version: The version of NCCL library files as returned
- by _nccl_version.
-
- Returns:
- The path to the NCCL library.
- """
- lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path,
- nccl_version))
- if not lib_path.exists:
- auto_configure_fail("Cannot find NCCL library %s" % str(lib_path))
- return lib_path
-
+ """Checks whether the header file matches the specified version of NCCL.
+
+ Args:
+ repository_ctx: The repository context.
+ nccl_install_path: The NCCL library install directory.
+ nccl_hdr_path: The NCCL header path.
+ nccl_version: The expected NCCL version.
+
+ Returns:
+ A string containing the library version of NCCL.
+ """
+ header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path)
+ if not header_path.exists:
+ header_path = _find_nccl_header(repository_ctx, nccl_install_path)
+ header_dir = str(header_path.realpath.dirname)
+ major_version = find_cuda_define(
+ repository_ctx,
+ header_dir,
+ "nccl.h",
+ _DEFINE_NCCL_MAJOR,
+ )
+ minor_version = find_cuda_define(
+ repository_ctx,
+ header_dir,
+ "nccl.h",
+ _DEFINE_NCCL_MINOR,
+ )
+ patch_version = find_cuda_define(
+ repository_ctx,
+ header_dir,
+ "nccl.h",
+ _DEFINE_NCCL_PATCH,
+ )
+ header_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
+ if not matches_version(nccl_version, header_version):
+ auto_configure_fail(
+ ("NCCL library version detected from %s/nccl.h (%s) does not match " +
+ "TF_NCCL_VERSION (%s). To fix this rerun configure again.") %
+ (header_dir, header_version, nccl_version),
+ )
def _nccl_configure_impl(repository_ctx):
- """Implementation of the nccl_configure repository rule."""
- if _TF_NCCL_VERSION not in repository_ctx.os.environ:
- # Add a dummy build file to make bazel query happy.
- repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
- return
-
- if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ:
- # Forward to the pre-configured remote repository.
- repository_ctx.template("BUILD", _NCCL_REMOTE_BUILD_TEMPLATE, {
- "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO],
- })
- return
-
- nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip()
- if matches_version("1", nccl_version):
- # Alias to GitHub target from @nccl_archive.
- if not matches_version(nccl_version, "1.3"):
- auto_configure_fail(
- "NCCL from GitHub must use version 1.3 (got %s)" % nccl_version)
- repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT)
- else:
- # Create target for locally installed NCCL.
- nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
- nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip()
- _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version)
- repository_ctx.template("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE, {
- "%{version}": nccl_version,
- "%{install_path}": nccl_install_path,
- "%{hdr_path}": nccl_hdr_path,
- })
-
+ """Implementation of the nccl_configure repository rule."""
+ if _TF_NCCL_VERSION not in repository_ctx.os.environ:
+ # Add a dummy build file to make bazel query happy.
+ repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
+ return
+
+ if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ:
+ # Forward to the pre-configured remote repository.
+ repository_ctx.template("BUILD", _label("remote.BUILD.tpl"), {
+ "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO],
+ })
+ return
+
+ nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip()
+ if nccl_version == "":
+ # Alias to open source build from @nccl_archive.
+ repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT)
+
+ # TODO(csigg): implement and reuse in cuda_configure.bzl.
+ gpu_architectures = [
+ "sm_" + capability.replace(".", "")
+ for capability in compute_capabilities(repository_ctx)
+ ]
+
+ # Round-about way to make the list unique.
+ gpu_architectures = dict(zip(gpu_architectures, gpu_architectures)).keys()
+ repository_ctx.template("build_defs.bzl", _label("build_defs.bzl.tpl"), {
+ "%{gpu_architectures}": str(gpu_architectures),
+ })
+
+ repository_ctx.symlink(cuda_toolkit_path(repository_ctx), "cuda")
+
+ # Temporary work-around for setups which symlink ptxas to a newer
+ # version. The versions of nvlink and ptxas need to agree, so we find
+ # nvlink next to the real location of ptxas. This is only temporary and
+ # will be removed again soon.
+ nvlink_dir = repository_ctx.path("cuda/bin/ptxas").realpath.dirname
+ repository_ctx.symlink(nvlink_dir.get_child("nvlink"), "nvlink")
+ else:
+ # Create target for locally installed NCCL.
+ nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
+ nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip()
+ _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version)
+ repository_ctx.template("BUILD", _label("system.BUILD.tpl"), {
+ "%{version}": nccl_version,
+ "%{install_path}": nccl_install_path,
+ "%{hdr_path}": nccl_hdr_path,
+ })
nccl_configure = repository_rule(
- implementation=_nccl_configure_impl,
- environ=[
- _NCCL_INSTALL_PATH,
+ implementation = _nccl_configure_impl,
+ environ = [
+ _CUDA_TOOLKIT_PATH,
_NCCL_HDR_PATH,
+ _NCCL_INSTALL_PATH,
_TF_NCCL_VERSION,
+ _TF_CUDA_COMPUTE_CAPABILITIES,
+ _TF_NCCL_CONFIG_REPO,
],
)
"""Detects and configures the NCCL configuration.