aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Jason Furmanek <furmanek@us.ibm.com>2018-09-26 04:44:12 +0000
committerGravatar Jason Furmanek <furmanek@us.ibm.com>2018-09-26 04:44:12 +0000
commit7c2341501a583ca625c976f118090e495cdcbe07 (patch)
tree3c6cba5366c4f0f119df312d7e4d26f3f4119b4e
parent6666516f390f125ed70ddbd4e6f89b83d953c408 (diff)
Find NCCL2 debians in Tensorflow configure
-rw-r--r--configure.py136
-rw-r--r--third_party/nccl/nccl_configure.bzl14
-rw-r--r--third_party/nccl/system.BUILD.tpl4
3 files changed, 105 insertions, 49 deletions
diff --git a/configure.py b/configure.py
index f0b9fada5e..9fd2dc2630 100644
--- a/configure.py
+++ b/configure.py
@@ -54,6 +54,12 @@ _TF_BAZELRC_FILENAME = '.tf_configure.bazelrc'
_TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME)
_TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE')
+NCCL_LIB_PATHS = [
+ "lib64/",
+ "lib/powerpc64le-linux-gnu/",
+ "lib/x86_64-linux-gnu/",
+ ""
+]
class UserInputError(Exception):
pass
@@ -1085,7 +1091,7 @@ def set_tf_tensorrt_install_path(environ_cp):
def set_tf_nccl_install_path(environ_cp):
- """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION.
+ """Set NCCL_INSTALL_PATH, NCCL_HDR_PATH and TF_NCCL_VERSION.
Args:
environ_cp: copy of the os.environ.
@@ -1111,46 +1117,98 @@ def set_tf_nccl_install_path(environ_cp):
if tf_nccl_version == '1':
break # No need to get install path, NCCL 1 is a GitHub repo.
- # TODO(csigg): Look with ldconfig first if we can find the library in paths
+ # Look with ldconfig first if we can find the library in paths
# like /usr/lib/x86_64-linux-gnu and the header file in the corresponding
# include directory. This is where the NCCL .deb packages install them.
- # Then ask the user if we should use that. Instead of a single
- # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to
- # nccl_configure.bzl
- default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH')
- ask_nccl_path = (r'Please specify the location where NCCL %s library is '
+
+ # First check to see if NCCL is in the ldconfig.
+ # If its found, use that location.
+ if is_linux():
+ ldconfig_bin = which('ldconfig') or '/sbin/ldconfig'
+ nccl2_path_from_ldconfig = run_shell([ldconfig_bin, '-p'])
+ nccl2_path_from_ldconfig = re.search('.*libnccl.so .* => (.*)',
+ nccl2_path_from_ldconfig)
+ if nccl2_path_from_ldconfig:
+ nccl2_path_from_ldconfig = nccl2_path_from_ldconfig.group(1)
+ if os.path.exists('%s.%s' % (nccl2_path_from_ldconfig, tf_nccl_version)):
+ nccl_install_path = os.path.dirname(nccl2_path_from_ldconfig)
+ print('NCCL libraries found in ' + nccl2_path_from_ldconfig)
+
+ # Check if this is the main system lib location
+ if re.search('.*linux-gnu', nccl_install_path):
+ trunc_nccl_install_path = "/usr"
+ print("This looks like a system path.")
+ else:
+ trunc_nccl_install_path = nccl_install_path + "/.."
+
+ # Look for header
+ nccl_hdr_path = trunc_nccl_install_path + "/include"
+ print("Assuming NCCL header path is " + nccl_hdr_path)
+ if os.path.exists(nccl_hdr_path + "/nccl.h"):
+ # Set NCCL_INSTALL_PATH
+ environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path
+ write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path)
+
+ # Set NCCL_HDR_PATH
+ environ_cp['NCCL_HDR_PATH'] = nccl_hdr_path
+ write_action_env_to_bazelrc('NCCL_HDR_PATH', nccl_hdr_path)
+ break
+ else:
+ print('The header for NCCL2 cannot be found. Please install the libnccl-dev package.')
+ else:
+ print('NCCL2 is listed by ldconfig but the library is not found. '
+ 'Your ldconfig is out of date. Please run sudo ldconfig.')
+ else:
+ # NCCL is not found in ldconfig. Ask the user for the location.
+ default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH')
+ ask_nccl_path = (r'Please specify the location where NCCL %s library is '
'installed. Refer to README.md for more details. [Default '
'is %s]:') % (tf_nccl_version, default_nccl_path)
- nccl_install_path = get_from_env_or_user_or_default(
+ nccl_install_path = get_from_env_or_user_or_default(
environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path)
- # Result returned from "read" will be used unexpanded. That make "~"
- # unusable. Going through one more level of expansion to handle that.
- nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path))
- if is_windows() or is_cygwin():
- nccl_install_path = cygpath(nccl_install_path)
-
- if is_windows():
- nccl_lib_path = 'lib/x64/nccl.lib'
- elif is_linux():
- nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version
- elif is_macos():
- nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version
-
- nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path)
- nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h')
- if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path):
- # Set NCCL_INSTALL_PATH
- environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path
- write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path)
- break
-
- # Reset and Retry
- print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the '
- 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path,
+ # Result returned from "read" will be used unexpanded. That make "~"
+ # unusable. Going through one more level of expansion to handle that.
+ nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path))
+ if is_windows() or is_cygwin():
+ nccl_install_path = cygpath(nccl_install_path)
+
+ if is_windows():
+ nccl_lib_path = 'lib/x64/nccl.lib'
+ elif is_linux():
+ nccl_lib_filename = 'libnccl.so.%s' % tf_nccl_version
+ nccl_lpath = '%s/lib/%s' % (nccl_install_path, nccl_lib_filename)
+ if not os.path.exists(nccl_lpath):
+ for relative_path in NCCL_LIB_PATHS:
+ path = '%s/%s%s' % (nccl_install_path, relative_path, nccl_lib_filename)
+ if os.path.exists(path):
+ print("NCCL found at " + path)
+ nccl_lib_path = path
+ break
+ else:
+ nccl_lib_path = nccl_lpath
+ elif is_macos():
+ nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version
+
+ nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path)
+ nccl_hdr_path = os.path.join(os.path.dirname(nccl_lib_path), '../include/nccl.h')
+ print("Assuming NCCL header path is "+nccl_hdr_path)
+ if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path):
+ # Set NCCL_INSTALL_PATH
+ environ_cp['NCCL_INSTALL_PATH'] = os.path.dirname(nccl_lib_path)
+ write_action_env_to_bazelrc('NCCL_INSTALL_PATH', os.path.dirname(nccl_lib_path))
+
+ # Set NCCL_HDR_PATH
+ environ_cp['NCCL_HDR_PATH'] = os.path.dirname(nccl_hdr_path)
+ write_action_env_to_bazelrc('NCCL_HDR_PATH', os.path.dirname(nccl_hdr_path))
+ break
+
+ # Reset and Retry
+ print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the '
+ 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path,
nccl_hdr_path))
- environ_cp['TF_NCCL_VERSION'] = ''
+ environ_cp['TF_NCCL_VERSION'] = ''
else:
raise UserInputError('Invalid TF_NCCL setting was provided %d '
'times in a row. Assuming to be a scripting mistake.' %
@@ -1401,20 +1459,10 @@ def set_grpc_build_flags():
def set_system_libs_flag(environ_cp):
syslibs = environ_cp.get('TF_SYSTEM_LIBS', '')
+ syslibs = ','.join(sorted(syslibs.split(',')))
if syslibs and syslibs != '':
- if ',' in syslibs:
- syslibs = ','.join(sorted(syslibs.split(',')))
- else:
- syslibs = ','.join(sorted(syslibs.split()))
write_action_env_to_bazelrc('TF_SYSTEM_LIBS', syslibs)
- if 'PREFIX' in environ_cp:
- write_to_bazelrc('build --define=PREFIX=%s' % environ_cp['PREFIX'])
- if 'LIBDIR' in environ_cp:
- write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR'])
- if 'INCLUDEDIR' in environ_cp:
- write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
-
def set_windows_build_flags(environ_cp):
"""Set Windows specific build options."""
diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl
index ce9447096e..0713b36724 100644
--- a/third_party/nccl/nccl_configure.bzl
+++ b/third_party/nccl/nccl_configure.bzl
@@ -5,6 +5,7 @@
* `TF_NCCL_VERSION`: The NCCL version.
* `NCCL_INSTALL_PATH`: The installation path of the NCCL library.
+ * `NCCL_HDR_PATH`: The installation path of the NCCL header files.
"""
load(
@@ -15,6 +16,7 @@ load(
)
_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
+_NCCL_HDR_PATH = "NCCL_HDR_PATH"
_TF_NCCL_VERSION = "TF_NCCL_VERSION"
_TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO"
@@ -68,7 +70,7 @@ def _find_nccl_header(repository_ctx, nccl_install_path):
return header_path
-def _check_nccl_version(repository_ctx, nccl_install_path, nccl_version):
+def _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version):
"""Checks whether the header file matches the specified version of NCCL.
Args:
@@ -79,7 +81,9 @@ def _check_nccl_version(repository_ctx, nccl_install_path, nccl_version):
Returns:
A string containing the library version of NCCL.
"""
- header_path = _find_nccl_header(repository_ctx, nccl_install_path)
+ header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path)
+ if not header_path.exists:
+ header_path = _find_nccl_header(repository_ctx, nccl_install_path)
header_dir = str(header_path.realpath.dirname)
major_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
_DEFINE_NCCL_MAJOR)
@@ -109,6 +113,7 @@ def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version):
"""
lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path,
nccl_version))
+
if not lib_path.exists:
auto_configure_fail("Cannot find NCCL library %s" % str(lib_path))
return lib_path
@@ -138,10 +143,12 @@ def _nccl_configure_impl(repository_ctx):
else:
# Create target for locally installed NCCL.
nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
- _check_nccl_version(repository_ctx, nccl_install_path, nccl_version)
+ nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip()
+ _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version)
repository_ctx.template("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE, {
"%{version}": nccl_version,
"%{install_path}": nccl_install_path,
+ "%{hdr_path}": nccl_hdr_path,
})
@@ -149,6 +156,7 @@ nccl_configure = repository_rule(
implementation=_nccl_configure_impl,
environ=[
_NCCL_INSTALL_PATH,
+ _NCCL_HDR_PATH,
_TF_NCCL_VERSION,
],
)
diff --git a/third_party/nccl/system.BUILD.tpl b/third_party/nccl/system.BUILD.tpl
index 7ca835dedf..a07f54955f 100644
--- a/third_party/nccl/system.BUILD.tpl
+++ b/third_party/nccl/system.BUILD.tpl
@@ -20,7 +20,7 @@ genrule(
"libnccl.so.%{version}",
"nccl.h",
],
- cmd = """cp "%{install_path}/include/nccl.h" "$(@D)/nccl.h" &&
- cp "%{install_path}/lib/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """,
+ cmd = """cp "%{hdr_path}/nccl.h" "$(@D)/nccl.h" &&
+ cp "%{install_path}/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """,
)