diff options
Diffstat (limited to 'third_party/nccl/nccl_configure.bzl')
-rw-r--r-- | third_party/nccl/nccl_configure.bzl | 214 |
1 files changed, 118 insertions, 96 deletions
diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl index d78fe8f3aa..7f00df0962 100644 --- a/third_party/nccl/nccl_configure.bzl +++ b/third_party/nccl/nccl_configure.bzl @@ -11,12 +11,16 @@ load( "//third_party/gpus:cuda_configure.bzl", "auto_configure_fail", + "compute_capabilities", + "cuda_toolkit_path", "find_cuda_define", "matches_version", ) -_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH" +_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH" _NCCL_HDR_PATH = "NCCL_HDR_PATH" +_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH" +_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES" _TF_NCCL_VERSION = "TF_NCCL_VERSION" _TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO" @@ -37,6 +41,12 @@ cc_library( """ _NCCL_ARCHIVE_BUILD_CONTENT = """ +exports_files([ + "cuda/bin/crt/link.stub", + "cuda/bin/fatbinary", + "nvlink", +]) + filegroup( name = "LICENSE", data = ["@nccl_archive//:LICENSE.txt"], @@ -50,113 +60,125 @@ alias( ) """ -# Local build results in dynamic link and the license should not be included. -_NCCL_REMOTE_BUILD_TEMPLATE = Label("//third_party/nccl:remote.BUILD.tpl") -_NCCL_LOCAL_BUILD_TEMPLATE = Label("//third_party/nccl:system.BUILD.tpl") +def _label(file): + return Label("//third_party/nccl:{}".format(file)) def _find_nccl_header(repository_ctx, nccl_install_path): - """Finds the NCCL header on the system. - - Args: - repository_ctx: The repository context. - nccl_install_path: The NCCL library install directory. + """Finds the NCCL header on the system. - Returns: - The path to the NCCL header. - """ - header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path) - if not header_path.exists: - auto_configure_fail("Cannot find %s" % str(header_path)) - return header_path + Args: + repository_ctx: The repository context. + nccl_install_path: The NCCL library install directory. + Returns: + The path to the NCCL header. + """ + header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path) + if not header_path.exists: + auto_configure_fail("Cannot find %s" % str(header_path)) + return header_path def _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version): - """Checks whether the header file matches the specified version of NCCL. - - Args: - repository_ctx: The repository context. - nccl_install_path: The NCCL library install directory. - nccl_version: The expected NCCL version. - - Returns: - A string containing the library version of NCCL. - """ - header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path) - if not header_path.exists: - header_path = _find_nccl_header(repository_ctx, nccl_install_path) - header_dir = str(header_path.realpath.dirname) - major_version = find_cuda_define(repository_ctx, header_dir, "nccl.h", - _DEFINE_NCCL_MAJOR) - minor_version = find_cuda_define(repository_ctx, header_dir, "nccl.h", - _DEFINE_NCCL_MINOR) - patch_version = find_cuda_define(repository_ctx, header_dir, "nccl.h", - _DEFINE_NCCL_PATCH) - header_version = "%s.%s.%s" % (major_version, minor_version, patch_version) - if not matches_version(nccl_version, header_version): - auto_configure_fail( - ("NCCL library version detected from %s/nccl.h (%s) does not match " + - "TF_NCCL_VERSION (%s). To fix this rerun configure again.") % - (header_dir, header_version, nccl_version)) - - -def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version): - """Finds the given NCCL library on the system. - - Args: - repository_ctx: The repository context. - nccl_install_path: The NCCL library installation directory. - nccl_version: The version of NCCL library files as returned - by _nccl_version. - - Returns: - The path to the NCCL library. - """ - lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path, - nccl_version)) - if not lib_path.exists: - auto_configure_fail("Cannot find NCCL library %s" % str(lib_path)) - return lib_path - + """Checks whether the header file matches the specified version of NCCL. + + Args: + repository_ctx: The repository context. + nccl_install_path: The NCCL library install directory. + nccl_hdr_path: The NCCL header path. + nccl_version: The expected NCCL version. + + Returns: + A string containing the library version of NCCL. + """ + header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path) + if not header_path.exists: + header_path = _find_nccl_header(repository_ctx, nccl_install_path) + header_dir = str(header_path.realpath.dirname) + major_version = find_cuda_define( + repository_ctx, + header_dir, + "nccl.h", + _DEFINE_NCCL_MAJOR, + ) + minor_version = find_cuda_define( + repository_ctx, + header_dir, + "nccl.h", + _DEFINE_NCCL_MINOR, + ) + patch_version = find_cuda_define( + repository_ctx, + header_dir, + "nccl.h", + _DEFINE_NCCL_PATCH, + ) + header_version = "%s.%s.%s" % (major_version, minor_version, patch_version) + if not matches_version(nccl_version, header_version): + auto_configure_fail( + ("NCCL library version detected from %s/nccl.h (%s) does not match " + + "TF_NCCL_VERSION (%s). To fix this rerun configure again.") % + (header_dir, header_version, nccl_version), + ) def _nccl_configure_impl(repository_ctx): - """Implementation of the nccl_configure repository rule.""" - if _TF_NCCL_VERSION not in repository_ctx.os.environ: - # Add a dummy build file to make bazel query happy. - repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT) - return - - if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ: - # Forward to the pre-configured remote repository. - repository_ctx.template("BUILD", _NCCL_REMOTE_BUILD_TEMPLATE, { - "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO], - }) - return - - nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip() - if matches_version("1", nccl_version): - # Alias to GitHub target from @nccl_archive. - if not matches_version(nccl_version, "1.3"): - auto_configure_fail( - "NCCL from GitHub must use version 1.3 (got %s)" % nccl_version) - repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) - else: - # Create target for locally installed NCCL. - nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip() - nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip() - _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version) - repository_ctx.template("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE, { - "%{version}": nccl_version, - "%{install_path}": nccl_install_path, - "%{hdr_path}": nccl_hdr_path, - }) - + """Implementation of the nccl_configure repository rule.""" + if _TF_NCCL_VERSION not in repository_ctx.os.environ: + # Add a dummy build file to make bazel query happy. + repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT) + return + + if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ: + # Forward to the pre-configured remote repository. + repository_ctx.template("BUILD", _label("remote.BUILD.tpl"), { + "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO], + }) + return + + nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip() + if nccl_version == "": + # Alias to open source build from @nccl_archive. + repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) + + # TODO(csigg): implement and reuse in cuda_configure.bzl. + gpu_architectures = [ + "sm_" + capability.replace(".", "") + for capability in compute_capabilities(repository_ctx) + ] + + # Round-about way to make the list unique. + gpu_architectures = dict(zip(gpu_architectures, gpu_architectures)).keys() + repository_ctx.template("build_defs.bzl", _label("build_defs.bzl.tpl"), { + "%{gpu_architectures}": str(gpu_architectures), + }) + + repository_ctx.symlink(cuda_toolkit_path(repository_ctx), "cuda") + + # Temporary work-around for setups which symlink ptxas to a newer + # version. The versions of nvlink and ptxas need to agree, so we find + # nvlink next to the real location of ptxas. This is only temporary and + # will be removed again soon. + nvlink_dir = repository_ctx.path("cuda/bin/ptxas").realpath.dirname + repository_ctx.symlink(nvlink_dir.get_child("nvlink"), "nvlink") + else: + # Create target for locally installed NCCL. + nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip() + nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip() + _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version) + repository_ctx.template("BUILD", _label("system.BUILD.tpl"), { + "%{version}": nccl_version, + "%{install_path}": nccl_install_path, + "%{hdr_path}": nccl_hdr_path, + }) nccl_configure = repository_rule( - implementation=_nccl_configure_impl, - environ=[ - _NCCL_INSTALL_PATH, + implementation = _nccl_configure_impl, + environ = [ + _CUDA_TOOLKIT_PATH, _NCCL_HDR_PATH, + _NCCL_INSTALL_PATH, _TF_NCCL_VERSION, + _TF_CUDA_COMPUTE_CAPABILITIES, + _TF_NCCL_CONFIG_REPO, ], ) """Detects and configures the NCCL configuration. |