From 7c2341501a583ca625c976f118090e495cdcbe07 Mon Sep 17 00:00:00 2001 From: Jason Furmanek Date: Wed, 26 Sep 2018 04:44:12 +0000 Subject: Find NCCL2 debians in Tensorflow configure --- configure.py | 136 ++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 92 insertions(+), 44 deletions(-) (limited to 'configure.py') diff --git a/configure.py b/configure.py index f0b9fada5e..9fd2dc2630 100644 --- a/configure.py +++ b/configure.py @@ -54,6 +54,12 @@ _TF_BAZELRC_FILENAME = '.tf_configure.bazelrc' _TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME) _TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE') +NCCL_LIB_PATHS = [ + "lib64/", + "lib/powerpc64le-linux-gnu/", + "lib/x86_64-linux-gnu/", + "" +] class UserInputError(Exception): pass @@ -1085,7 +1091,7 @@ def set_tf_tensorrt_install_path(environ_cp): def set_tf_nccl_install_path(environ_cp): - """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION. + """Set NCCL_INSTALL_PATH, NCCL_HDR_PATH and TF_NCCL_VERSION. Args: environ_cp: copy of the os.environ. @@ -1111,46 +1117,98 @@ def set_tf_nccl_install_path(environ_cp): if tf_nccl_version == '1': break # No need to get install path, NCCL 1 is a GitHub repo. - # TODO(csigg): Look with ldconfig first if we can find the library in paths + # Look with ldconfig first if we can find the library in paths # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding # include directory. This is where the NCCL .deb packages install them. - # Then ask the user if we should use that. Instead of a single - # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to - # nccl_configure.bzl - default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH') - ask_nccl_path = (r'Please specify the location where NCCL %s library is ' + + # First check to see if NCCL is in the ldconfig. + # If its found, use that location. + if is_linux(): + ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' + nccl2_path_from_ldconfig = run_shell([ldconfig_bin, '-p']) + nccl2_path_from_ldconfig = re.search('.*libnccl.so .* => (.*)', + nccl2_path_from_ldconfig) + if nccl2_path_from_ldconfig: + nccl2_path_from_ldconfig = nccl2_path_from_ldconfig.group(1) + if os.path.exists('%s.%s' % (nccl2_path_from_ldconfig, tf_nccl_version)): + nccl_install_path = os.path.dirname(nccl2_path_from_ldconfig) + print('NCCL libraries found in ' + nccl2_path_from_ldconfig) + + # Check if this is the main system lib location + if re.search('.*linux-gnu', nccl_install_path): + trunc_nccl_install_path = "/usr" + print("This looks like a system path.") + else: + trunc_nccl_install_path = nccl_install_path + "/.." + + # Look for header + nccl_hdr_path = trunc_nccl_install_path + "/include" + print("Assuming NCCL header path is " + nccl_hdr_path) + if os.path.exists(nccl_hdr_path + "/nccl.h"): + # Set NCCL_INSTALL_PATH + environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path + write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path) + + # Set NCCL_HDR_PATH + environ_cp['NCCL_HDR_PATH'] = nccl_hdr_path + write_action_env_to_bazelrc('NCCL_HDR_PATH', nccl_hdr_path) + break + else: + print('The header for NCCL2 cannot be found. Please install the libnccl-dev package.') + else: + print('NCCL2 is listed by ldconfig but the library is not found. ' + 'Your ldconfig is out of date. Please run sudo ldconfig.') + else: + # NCCL is not found in ldconfig. Ask the user for the location. + default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH') + ask_nccl_path = (r'Please specify the location where NCCL %s library is ' 'installed. Refer to README.md for more details. [Default ' 'is %s]:') % (tf_nccl_version, default_nccl_path) - nccl_install_path = get_from_env_or_user_or_default( + nccl_install_path = get_from_env_or_user_or_default( environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path) - # Result returned from "read" will be used unexpanded. That make "~" - # unusable. Going through one more level of expansion to handle that. - nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path)) - if is_windows() or is_cygwin(): - nccl_install_path = cygpath(nccl_install_path) - - if is_windows(): - nccl_lib_path = 'lib/x64/nccl.lib' - elif is_linux(): - nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version - elif is_macos(): - nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version - - nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path) - nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h') - if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path): - # Set NCCL_INSTALL_PATH - environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path - write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path) - break - - # Reset and Retry - print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the ' - 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path, + # Result returned from "read" will be used unexpanded. That make "~" + # unusable. Going through one more level of expansion to handle that. + nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path)) + if is_windows() or is_cygwin(): + nccl_install_path = cygpath(nccl_install_path) + + if is_windows(): + nccl_lib_path = 'lib/x64/nccl.lib' + elif is_linux(): + nccl_lib_filename = 'libnccl.so.%s' % tf_nccl_version + nccl_lpath = '%s/lib/%s' % (nccl_install_path, nccl_lib_filename) + if not os.path.exists(nccl_lpath): + for relative_path in NCCL_LIB_PATHS: + path = '%s/%s%s' % (nccl_install_path, relative_path, nccl_lib_filename) + if os.path.exists(path): + print("NCCL found at " + path) + nccl_lib_path = path + break + else: + nccl_lib_path = nccl_lpath + elif is_macos(): + nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version + + nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path) + nccl_hdr_path = os.path.join(os.path.dirname(nccl_lib_path), '../include/nccl.h') + print("Assuming NCCL header path is "+nccl_hdr_path) + if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path): + # Set NCCL_INSTALL_PATH + environ_cp['NCCL_INSTALL_PATH'] = os.path.dirname(nccl_lib_path) + write_action_env_to_bazelrc('NCCL_INSTALL_PATH', os.path.dirname(nccl_lib_path)) + + # Set NCCL_HDR_PATH + environ_cp['NCCL_HDR_PATH'] = os.path.dirname(nccl_hdr_path) + write_action_env_to_bazelrc('NCCL_HDR_PATH', os.path.dirname(nccl_hdr_path)) + break + + # Reset and Retry + print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the ' + 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path, nccl_hdr_path)) - environ_cp['TF_NCCL_VERSION'] = '' + environ_cp['TF_NCCL_VERSION'] = '' else: raise UserInputError('Invalid TF_NCCL setting was provided %d ' 'times in a row. Assuming to be a scripting mistake.' % @@ -1401,20 +1459,10 @@ def set_grpc_build_flags(): def set_system_libs_flag(environ_cp): syslibs = environ_cp.get('TF_SYSTEM_LIBS', '') + syslibs = ','.join(sorted(syslibs.split(','))) if syslibs and syslibs != '': - if ',' in syslibs: - syslibs = ','.join(sorted(syslibs.split(','))) - else: - syslibs = ','.join(sorted(syslibs.split())) write_action_env_to_bazelrc('TF_SYSTEM_LIBS', syslibs) - if 'PREFIX' in environ_cp: - write_to_bazelrc('build --define=PREFIX=%s' % environ_cp['PREFIX']) - if 'LIBDIR' in environ_cp: - write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR']) - if 'INCLUDEDIR' in environ_cp: - write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR']) - def set_windows_build_flags(environ_cp): """Set Windows specific build options.""" -- cgit v1.2.3