diff options
author | gracehoney <31743510+aaroey@users.noreply.github.com> | 2018-01-26 09:49:23 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-26 09:49:23 -0800 |
commit | 7611eef0208fa1413880a704e622e57bbcfad0d6 (patch) | |
tree | 4cfbf626431c11f1f1e5219895800de7114edc03 /third_party | |
parent | 9384314e0cbf6f315d870200fc5abe421deefcab (diff) | |
parent | 78021a9a70923f1fdaa65b41271ad0ea70cd7e67 (diff) |
Merge branch 'master' into tensorrt
Diffstat (limited to 'third_party')
-rw-r--r-- | third_party/aws.BUILD | 2 | ||||
-rw-r--r-- | third_party/eigen3/BUILD | 2 | ||||
-rw-r--r-- | third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h | 6 | ||||
-rw-r--r-- | third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h | 6 | ||||
-rw-r--r-- | third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h | 6 | ||||
-rw-r--r-- | third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h | 6 | ||||
-rw-r--r-- | third_party/fft2d/fft.h | 6 | ||||
-rw-r--r-- | third_party/gpus/cuda_configure.bzl | 104 | ||||
-rw-r--r-- | third_party/jpeg/jpeg.BUILD | 53 | ||||
-rw-r--r-- | third_party/swig.BUILD | 2 | ||||
-rw-r--r-- | third_party/tensorrt/BUILD.tpl | 75 | ||||
-rw-r--r-- | third_party/tensorrt/build_defs.bzl.tpl | 23 | ||||
-rw-r--r-- | third_party/tensorrt/tensorrt_configure.bzl | 224 | ||||
-rw-r--r-- | third_party/toolchains/clang6/BUILD | 1 | ||||
-rw-r--r-- | third_party/toolchains/clang6/CROSSTOOL.tpl | 587 | ||||
-rw-r--r-- | third_party/toolchains/clang6/README.md | 101 | ||||
-rw-r--r-- | third_party/toolchains/clang6/clang.BUILD | 162 | ||||
-rw-r--r-- | third_party/toolchains/clang6/repo.bzl | 30 |
18 files changed, 1291 insertions, 105 deletions
diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD index bf5310aa16..2dc921933c 100644 --- a/third_party/aws.BUILD +++ b/third_party/aws.BUILD @@ -75,7 +75,7 @@ cc_library( "aws-cpp-sdk-s3/include/", ], deps = [ - "@curl//:curl", + "@curl", ], ) diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD index f5f3418527..f661093bc9 100644 --- a/third_party/eigen3/BUILD +++ b/third_party/eigen3/BUILD @@ -36,7 +36,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ "@eigen_archive//:eigen", - "@local_config_sycl//sycl:sycl", + "@local_config_sycl//sycl", ], ) diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h index c210b1712c..cb1636256d 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h @@ -1,5 +1,5 @@ -#ifndef THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_ -#define THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_ +#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_ +#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_ #ifdef _MSC_VER @@ -502,4 +502,4 @@ struct functor_traits<scalar_product_op<QInt32, double>> { } // end namespace internal } // end namespace Eigen -#endif // THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_ +#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h index 7a222fddc1..8f9906dbf9 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h @@ -1,5 +1,5 @@ -#ifndef THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ -#define THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ +#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ +#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ #include "PacketMathAVX2.h" @@ -542,4 +542,4 @@ EIGEN_STRONG_INLINE QInt8 predux_max<Packet64q8i>(const Packet64q8i& a) { } // end namespace internal } // end namespace Eigen -#endif // THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ +#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h index 045384d7fc..7b4ecc752f 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h @@ -1,5 +1,5 @@ -#ifndef THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ -#define THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ +#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ +#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ namespace Eigen { namespace internal { @@ -63,4 +63,4 @@ pcast<Packet8q32i, Packet32q8u>(const Packet8q32i& a, const Packet8q32i& b, } // end namespace internal } // end namespace Eigen -#endif // THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ +#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h index cd7120ec00..26735743d4 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h @@ -1,5 +1,5 @@ -#ifndef THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ -#define THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ +#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ +#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ namespace Eigen { namespace internal { @@ -177,4 +177,4 @@ pcast<Packet16q32i, Packet32q16u>(const Packet16q32i& a, } // end namespace internal } // end namespace Eigen -#endif // THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ +#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ diff --git a/third_party/fft2d/fft.h b/third_party/fft2d/fft.h index 252cc01fec..31b4935089 100644 --- a/third_party/fft2d/fft.h +++ b/third_party/fft2d/fft.h @@ -15,8 +15,8 @@ limitations under the License. // Declarations for 1D FFT routines in third_party/fft2d/fft. -#ifndef THIRD_PARTY_FFT2D_FFT_H__ -#define THIRD_PARTY_FFT2D_FFT_H__ +#ifndef FFT2D_FFT_H__ +#define FFT2D_FFT_H__ #ifdef __cplusplus extern "C" { @@ -33,4 +33,4 @@ extern void dfst(int, double *, double *, int *, double *); } #endif -#endif // THIRD_PARTY_FFT2D_FFT_H__ +#endif // FFT2D_FFT_H__ diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 2727fa5efe..8e1dd8a54f 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -236,7 +236,7 @@ def _cudnn_install_basedir(repository_ctx): return cudnn_install_path -def _matches_version(environ_version, detected_version): +def matches_version(environ_version, detected_version): """Checks whether the user-specified version matches the detected version. This function performs a weak matching so that if the user specifies only the @@ -317,7 +317,7 @@ def _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value): environ_version = "" if _TF_CUDA_VERSION in repository_ctx.os.environ: environ_version = repository_ctx.os.environ[_TF_CUDA_VERSION].strip() - if environ_version and not _matches_version(environ_version, full_version): + if environ_version and not matches_version(environ_version, full_version): auto_configure_fail( ("CUDA version detected from nvcc (%s) does not match " + "TF_CUDA_VERSION (%s)") % (full_version, environ_version)) @@ -338,35 +338,49 @@ _DEFINE_CUDNN_MINOR = "#define CUDNN_MINOR" _DEFINE_CUDNN_PATCHLEVEL = "#define CUDNN_PATCHLEVEL" -def _find_cuda_define(repository_ctx, cudnn_header_dir, define): - """Returns the value of a #define in cudnn.h +def find_cuda_define(repository_ctx, header_dir, header_file, define): + """Returns the value of a #define in a header file. - Greps through cudnn.h and returns the value of the specified #define. If the - #define is not found, then raise an error. + Greps through a header file and returns the value of the specified #define. + If the #define is not found, then raise an error. Args: repository_ctx: The repository context. - cudnn_header_dir: The directory containing the cuDNN header. + header_dir: The directory containing the header file. + header_file: The header file name. define: The #define to search for. Returns: - The value of the #define found in cudnn.h. + The value of the #define found in the header. """ - # Confirm location of cudnn.h and grep for the line defining CUDNN_MAJOR. - cudnn_h_path = repository_ctx.path("%s/cudnn.h" % cudnn_header_dir) - if not cudnn_h_path.exists: - auto_configure_fail("Cannot find cudnn.h at %s" % str(cudnn_h_path)) - result = repository_ctx.execute(["grep", "--color=never", "-E", define, str(cudnn_h_path)]) + # Confirm location of the header and grep for the line defining the macro. + h_path = repository_ctx.path("%s/%s" % (header_dir, header_file)) + if not h_path.exists: + auto_configure_fail("Cannot find %s at %s" % (header_file, str(h_path))) + result = repository_ctx.execute( + # Grep one more lines as some #defines are splitted into two lines. + ["grep", "--color=never", "-A1", "-E", define, str(h_path)]) if result.stderr: - auto_configure_fail("Error reading %s: %s" % - (result.stderr, str(cudnn_h_path))) + auto_configure_fail("Error reading %s: %s" % (str(h_path), result.stderr)) - # Parse the cuDNN major version from the line defining CUDNN_MAJOR - lines = result.stdout.splitlines() - if len(lines) == 0 or lines[0].find(define) == -1: + # Parse the version from the line defining the macro. + if result.stdout.find(define) == -1: auto_configure_fail("Cannot find line containing '%s' in %s" % - (define, str(cudnn_h_path))) - return lines[0].replace(define, "").strip() + (define, h_path)) + version = result.stdout + # Remove the new line and '\' character if any. + version = version.replace("\\", " ") + version = version.replace("\n", " ") + version = version.replace(define, "").lstrip() + # Remove the code after the version number. + version_end = version.find(" ") + if version_end != -1: + if version_end == 0: + auto_configure_fail( + "Cannot extract the version from line containing '%s' in %s" % + (define, str(h_path))) + version = version[:version_end].strip() + return version def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value): @@ -382,12 +396,12 @@ def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value): """ cudnn_header_dir = _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir) - major_version = _find_cuda_define(repository_ctx, cudnn_header_dir, - _DEFINE_CUDNN_MAJOR) - minor_version = _find_cuda_define(repository_ctx, cudnn_header_dir, - _DEFINE_CUDNN_MINOR) - patch_version = _find_cuda_define(repository_ctx, cudnn_header_dir, - _DEFINE_CUDNN_PATCHLEVEL) + major_version = find_cuda_define( + repository_ctx, cudnn_header_dir, "cudnn.h", _DEFINE_CUDNN_MAJOR) + minor_version = find_cuda_define( + repository_ctx, cudnn_header_dir, "cudnn.h", _DEFINE_CUDNN_MINOR) + patch_version = find_cuda_define( + repository_ctx, cudnn_header_dir, "cudnn.h", _DEFINE_CUDNN_PATCHLEVEL) full_version = "%s.%s.%s" % (major_version, minor_version, patch_version) # Check whether TF_CUDNN_VERSION was set by the user and fail if it does not @@ -395,7 +409,7 @@ def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value): environ_version = "" if _TF_CUDNN_VERSION in repository_ctx.os.environ: environ_version = repository_ctx.os.environ[_TF_CUDNN_VERSION].strip() - if environ_version and not _matches_version(environ_version, full_version): + if environ_version and not matches_version(environ_version, full_version): cudnn_h_path = repository_ctx.path("%s/include/cudnn.h" % cudnn_install_basedir) auto_configure_fail( @@ -427,7 +441,7 @@ def _compute_capabilities(repository_ctx): return capabilities -def _cpu_value(repository_ctx): +def get_cpu_value(repository_ctx): """Returns the name of the host operating system. Args: @@ -447,7 +461,7 @@ def _cpu_value(repository_ctx): def _is_windows(repository_ctx): """Returns true if the host operating system is windows.""" - return _cpu_value(repository_ctx) == "Windows" + return get_cpu_value(repository_ctx) == "Windows" def _lib_name(lib, cpu_value, version="", static=False): """Constructs the platform-specific name of a library. @@ -582,11 +596,8 @@ def _find_libs(repository_ctx, cuda_config): cuda_config: The CUDA config as returned by _get_cuda_config Returns: - Map of library names to structs of filename and path as returned by - _find_cuda_lib and _find_cupti_lib. + Map of library names to structs of filename and path. """ - cudnn_version = cuda_config.cudnn_version - cudnn_ext = ".%s" % cudnn_version if cudnn_version else "" cpu_value = cuda_config.cpu_value return { "cuda": _find_cuda_lib("cuda", repository_ctx, cpu_value, cuda_config.cuda_toolkit_path), @@ -611,7 +622,7 @@ def _find_libs(repository_ctx, cuda_config): "cudnn": _find_cuda_lib( "cudnn", repository_ctx, cpu_value, cuda_config.cudnn_install_basedir, cuda_config.cudnn_version), - "cupti": _find_cupti_lib(repository_ctx, cuda_config), + "cupti": _find_cupti_lib(repository_ctx, cuda_config) } @@ -654,7 +665,7 @@ def _get_cuda_config(repository_ctx): compute_capabilities: A list of the system's CUDA compute capabilities. cpu_value: The name of the host operating system. """ - cpu_value = _cpu_value(repository_ctx) + cpu_value = get_cpu_value(repository_ctx) cuda_toolkit_path = _cuda_toolkit_path(repository_ctx) cuda_version = _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value) cudnn_install_basedir = _cudnn_install_basedir(repository_ctx) @@ -712,13 +723,13 @@ error_gpu_disabled() def _create_dummy_repository(repository_ctx): - cpu_value = _cpu_value(repository_ctx) + cpu_value = get_cpu_value(repository_ctx) # Set up BUILD file for cuda/. _tpl(repository_ctx, "cuda:build_defs.bzl", { "%{cuda_is_configured}": "False", - "%{cuda_extra_copts}": "[]" + "%{cuda_extra_copts}": "[]", }) _tpl(repository_ctx, "cuda:BUILD", { @@ -805,8 +816,8 @@ def _norm_path(path): return path -def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name, - src_files = [], dest_files = []): +def symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name, + src_files = [], dest_files = []): """Returns a genrule to symlink(or copy if on Windows) a set of files. If src_dir is passed, files will be read from the given directory; otherwise @@ -913,11 +924,11 @@ def _create_local_cuda_repository(repository_ctx): # cuda_toolkit_path cuda_toolkit_path = cuda_config.cuda_toolkit_path cuda_include_path = cuda_toolkit_path + "/include" - genrules = [_symlink_genrule_for_dir(repository_ctx, + genrules = [symlink_genrule_for_dir(repository_ctx, cuda_include_path, "cuda/include", "cuda-include")] - genrules.append(_symlink_genrule_for_dir(repository_ctx, + genrules.append(symlink_genrule_for_dir(repository_ctx, cuda_toolkit_path + "/nvvm", "cuda/nvvm", "cuda-nvvm")) - genrules.append(_symlink_genrule_for_dir(repository_ctx, + genrules.append(symlink_genrule_for_dir(repository_ctx, cuda_toolkit_path + "/extras/CUPTI/include", "cuda/extras/CUPTI/include", "cuda-extras")) @@ -927,15 +938,15 @@ def _create_local_cuda_repository(repository_ctx): for lib in cuda_libs.values(): cuda_lib_src.append(lib.path) cuda_lib_dest.append("cuda/lib/" + lib.file_name) - genrules.append(_symlink_genrule_for_dir(repository_ctx, None, "", "cuda-lib", - cuda_lib_src, cuda_lib_dest)) + genrules.append(symlink_genrule_for_dir(repository_ctx, None, "", "cuda-lib", + cuda_lib_src, cuda_lib_dest)) - # Set up the symbolic links for cudnn if cudnn was was not installed to + # Set up the symbolic links for cudnn if cndnn was not installed to # CUDA_TOOLKIT_PATH. included_files = _read_dir(repository_ctx, cuda_include_path).replace( cuda_include_path, '').splitlines() if '/cudnn.h' not in included_files: - genrules.append(_symlink_genrule_for_dir(repository_ctx, None, + genrules.append(symlink_genrule_for_dir(repository_ctx, None, "cuda/include/", "cudnn-include", [cudnn_header_dir + "/cudnn.h"], ["cudnn.h"])) else: @@ -952,7 +963,6 @@ def _create_local_cuda_repository(repository_ctx): "%{cuda_is_configured}": "True", "%{cuda_extra_copts}": _compute_cuda_extra_copts( repository_ctx, cuda_config.compute_capabilities), - }) _tpl(repository_ctx, "cuda:BUILD", { diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD index 527a08c4b3..ca2d38d687 100644 --- a/third_party/jpeg/jpeg.BUILD +++ b/third_party/jpeg/jpeg.BUILD @@ -34,6 +34,10 @@ libjpegturbo_copts = select({ "-mfloat-abi=softfp", "-fprefetch-loop-arrays", ], + ":linux_ppc64le": [ + "-mcpu=power8", + "-mtune=power8", + ], "//conditions:default": [], }) @@ -123,11 +127,51 @@ cc_library( ":k8": [":simd_x86_64"], ":armeabi-v7a": [":simd_armv7a"], ":arm64-v8a": [":simd_armv8a"], + ":linux_ppc64le": [":simd_altivec"], "//conditions:default": [":simd_none"], }), ) cc_library( + name = "simd_altivec", + srcs = [ + "jchuff.h", + "jconfig.h", + "jdct.h", + "jerror.h", + "jinclude.h", + "jmorecfg.h", + "jpegint.h", + "jpeglib.h", + "jsimd.h", + "jsimddct.h", + "simd/jsimd.h", + "simd/jccolor-altivec.c", + "simd/jcgray-altivec.c", + "simd/jcsample-altivec.c", + "simd/jdcolor-altivec.c", + "simd/jdmerge-altivec.c", + "simd/jdsample-altivec.c", + "simd/jfdctfst-altivec.c", + "simd/jfdctint-altivec.c", + "simd/jidctfst-altivec.c", + "simd/jidctint-altivec.c", + "simd/jquanti-altivec.c", + "simd/jsimd_powerpc.c", + "simd/jsimd_altivec.h", + "simd/jcsample.h", + ], + hdrs = [ + "simd/jdmrgext-altivec.c", # should have been named .inc + "simd/jccolext-altivec.c", # should have been named .inc + "simd/jcgryext-altivec.c", # should have been named .inc + "simd/jdcolext-altivec.c", # should have been named .inc + ], + copts = libjpegturbo_copts, + nocopts = libjpegturbo_nocopts, +) + +cc_library( name = "simd_x86_64", srcs = [ "jchuff.h", @@ -219,7 +263,7 @@ genrule( " -o $$out" + " $$(dirname $(location simd/jdct.inc))/$$(basename $${out%.o}.asm)\n" + "done", - tools = ["@nasm//:nasm"], + tools = ["@nasm"], ) cc_library( @@ -381,6 +425,7 @@ genrule( ":k8": "cp $(location jconfig_nowin_simd.h) $@", ":armeabi-v7a": "cp $(location jconfig_nowin_simd.h) $@", ":arm64-v8a": "cp $(location jconfig_nowin_simd.h) $@", + ":linux_ppc64le": "cp $(location jconfig_nowin_simd.h) $@", "//conditions:default": "cp $(location jconfig_nowin_nosimd.h) $@", }), ) @@ -498,3 +543,9 @@ config_setting( name = "windows_msvc", values = {"cpu": "x64_windows_msvc"}, ) + +config_setting( + name = "linux_ppc64le", + values = {"cpu": "ppc"}, + +) diff --git a/third_party/swig.BUILD b/third_party/swig.BUILD index d698fa934b..f2f647401b 100644 --- a/third_party/swig.BUILD +++ b/third_party/swig.BUILD @@ -89,7 +89,7 @@ cc_binary( ], output_licenses = ["unencumbered"], visibility = ["//visibility:public"], - deps = ["@pcre//:pcre"], + deps = ["@pcre"], ) filegroup( diff --git a/third_party/tensorrt/BUILD.tpl b/third_party/tensorrt/BUILD.tpl index a8e52d13d3..6cb7db7e90 100644 --- a/third_party/tensorrt/BUILD.tpl +++ b/third_party/tensorrt/BUILD.tpl @@ -1,38 +1,69 @@ -# -*- python -*- +# NVIDIA TensorRT +# A high-performance deep learning inference optimizer and runtime. -licenses(["notice"]) +licenses(["notice"]) exports_files(["LICENSE"]) load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts", "if_cuda") -config_setting( - name = "trt_enabled", - define_values = { - "using_tensorrt":"true" - }, +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "tensorrt_headers", + hdrs = [%{tensorrt_headers}], + includes = [ + "include", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "nv_infer", + srcs = [%{nv_infer}], + data = [%{nv_infer}], + includes = [ + "include", + ], + copts= cuda_default_copts(), + deps = [ + "@local_config_cuda//cuda:cuda", + ":tensorrt_headers", + ], + linkstatic = 1, visibility = ["//visibility:public"], ) cc_library( - name = "tensorrt", - srcs =[%{tensorrt_lib}], - hdrs = ["include/NvInfer.h", - "include/NvUtils.h", + name = "nv_infer_plugin", + srcs = [%{nv_infer_plugin}], + data = [%{nv_infer_plugin}], + includes = [ + "include", ], copts= cuda_default_copts(), - deps =["@local_config_cuda//cuda:cuda", - "@local_config_cuda//cuda:cudnn",], + deps = [ + "@local_config_cuda//cuda:cuda", + ":nv_infer", + ":tensorrt_headers", + ], linkstatic = 1, - #include_prefix="include/", - includes=["include/"], - visibility = ["//visibility:public"], + visibility = ["//visibility:public"], ) -%{tensorrt_genrules} +cc_library( + name = "nv_parsers", + srcs = [%{nv_parsers}], + data = [%{nv_parsers}], + includes = [ + "include", + ], + copts= cuda_default_copts(), + deps = [ + ":tensorrt_headers", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) -# filegroup( -# name = "%{tensorrt_lib}", -# srcs = ["%{tensorrt_lib}"], -# visibility = ["//visibility:public"], -# ) +%{tensorrt_genrules}
\ No newline at end of file diff --git a/third_party/tensorrt/build_defs.bzl.tpl b/third_party/tensorrt/build_defs.bzl.tpl index 18f354ee5a..8a89b59bc8 100644 --- a/third_party/tensorrt/build_defs.bzl.tpl +++ b/third_party/tensorrt/build_defs.bzl.tpl @@ -1,18 +1,7 @@ -# -*- python -*- -""" -template file for trt functions +# Build configurations for TensorRT. -""" - -def is_trt_enabled(): - return %{trt_configured} - -def if_trt(if_true,if_false=[]): - # if is_trt_enabled(): - # return if_true - # return if_false - - return select({ - "@local_config_tensorrt//:trt_enabled":if_true, - "//conditions:default":if_false, - }) +def if_tensorrt(if_true, if_false=[]): + """Tests whether TensorRT was enabled during the configure process.""" + if %{tensorrt_is_configured}: + return if_true + return if_false
\ No newline at end of file diff --git a/third_party/tensorrt/tensorrt_configure.bzl b/third_party/tensorrt/tensorrt_configure.bzl new file mode 100644 index 0000000000..8aa0f28f39 --- /dev/null +++ b/third_party/tensorrt/tensorrt_configure.bzl @@ -0,0 +1,224 @@ +# -*- Python -*- +"""Repository rule for TensorRT configuration. + +`tensorrt_configure` depends on the following environment variables: + + * `TF_TENSORRT_VERSION`: The TensorRT libnvinfer version. + * `TENSORRT_INSTALL_PATH`: The installation path of the TensorRT library. +""" + +load( + "//third_party/gpus:cuda_configure.bzl", + "auto_configure_fail", + "get_cpu_value", + "find_cuda_define", + "matches_version", + "symlink_genrule_for_dir", +) + +_TENSORRT_INSTALL_PATH = "TENSORRT_INSTALL_PATH" +_TF_TENSORRT_VERSION = "TF_TENSORRT_VERSION" + +_TF_TENSORRT_LIBS = ["nvinfer", "nvinfer_plugin", "nvparsers"] +_TF_TENSORRT_HEADERS = [ + "NvInfer.h", "NvInferPlugin.h", "NvCaffeParser.h", "NvUffParser.h", + "NvUtils.h" +] + +_DEFINE_TENSORRT_SONAME_MAJOR = "#define NV_TENSORRT_SONAME_MAJOR" +_DEFINE_TENSORRT_SONAME_MINOR = "#define NV_TENSORRT_SONAME_MINOR" +_DEFINE_TENSORRT_SONAME_PATCH = "#define NV_TENSORRT_SONAME_PATCH" + + +def _headers_exist(repository_ctx, path): + """Returns whether all TensorRT header files could be found in 'path'. + + Args: + repository_ctx: The repository context. + path: The TensorRT include path to check. + + Returns: + True if all TensorRT header files can be found in the path. + """ + for h in _TF_TENSORRT_HEADERS: + if not repository_ctx.path("%s/%s" % (path, h)).exists: + return False + return True + + +def _find_trt_header_dir(repository_ctx, trt_install_path): + """Returns the path to the directory containing headers of TensorRT. + + Args: + repository_ctx: The repository context. + trt_install_path: The TensorRT library install directory. + + Returns: + The path of the directory containing the TensorRT header. + """ + if trt_install_path == "/usr/lib/x86_64-linux-gnu": + path = "/usr/include/x86_64-linux-gnu" + if _headers_exist(repository_ctx, path): + return path + path = str(repository_ctx.path("%s/../include" % trt_install_path).realpath) + if _headers_exist(repository_ctx, path): + return path + auto_configure_fail( + "Cannot find NvInfer.h with TensorRT install path %s" % trt_install_path) + + +def _trt_lib_version(repository_ctx, trt_install_path): + """Detects the library (e.g. libnvinfer) version of TensorRT. + + Args: + repository_ctx: The repository context. + trt_install_path: The TensorRT library install directory. + + Returns: + A string containing the library version of TensorRT. + """ + trt_header_dir = _find_trt_header_dir(repository_ctx, trt_install_path) + major_version = find_cuda_define(repository_ctx, trt_header_dir, "NvInfer.h", + _DEFINE_TENSORRT_SONAME_MAJOR) + minor_version = find_cuda_define(repository_ctx, trt_header_dir, "NvInfer.h", + _DEFINE_TENSORRT_SONAME_MINOR) + patch_version = find_cuda_define(repository_ctx, trt_header_dir, "NvInfer.h", + _DEFINE_TENSORRT_SONAME_PATCH) + full_version = "%s.%s.%s" % (major_version, minor_version, patch_version) + environ_version = repository_ctx.os.environ[_TF_TENSORRT_VERSION].strip() + if not matches_version(environ_version, full_version): + auto_configure_fail( + ("TensorRT library version detected from %s/%s (%s) does not match " + + "TF_TENSORRT_VERSION (%s). To fix this rerun configure again.") % + (trt_header_dir, "NvInfer.h", full_version, environ_version)) + return environ_version + + +def _find_trt_libs(repository_ctx, trt_install_path, trt_lib_version): + """Finds the given TensorRT library on the system. + + Adapted from code contributed by Sami Kama (https://github.com/samikama). + + Args: + repository_ctx: The repository context. + trt_install_path: The TensorRT library installation directory. + trt_lib_version: The version of TensorRT library files as returned + by _trt_lib_version. + + Returns: + Map of library names to structs with the following fields: + src_file_path: The full path to the library found on the system. + dst_file_name: The basename of the target library. + """ + objdump = repository_ctx.which("objdump") + result = {} + for lib in _TF_TENSORRT_LIBS: + dst_file_name = "lib%s.so.%s" % (lib, trt_lib_version) + src_file_path = repository_ctx.path("%s/%s" % (trt_install_path, + dst_file_name)) + if not src_file_path.exists: + auto_configure_fail( + "Cannot find TensorRT library %s" % str(src_file_path)) + if objdump != None: + objdump_out = repository_ctx.execute([objdump, "-p", str(src_file_path)]) + for line in objdump_out.stdout.splitlines(): + if "SONAME" in line: + dst_file_name = line.strip().split(" ")[-1] + result.update({ + lib: + struct( + dst_file_name=dst_file_name, + src_file_path=str(src_file_path.realpath)) + }) + return result + + +def _tpl(repository_ctx, tpl, substitutions): + repository_ctx.template(tpl, Label("//third_party/tensorrt:%s.tpl" % tpl), + substitutions) + + +def _create_dummy_repository(repository_ctx): + """Create a dummy TensorRT repository.""" + _tpl(repository_ctx, "build_defs.bzl", {"%{tensorrt_is_configured}": "False"}) + substitutions = { + "%{tensorrt_genrules}": "", + "%{tensorrt_headers}": "", + } + for lib in _TF_TENSORRT_LIBS: + k = "%%{%s}" % lib.replace("nv", "nv_") + substitutions.update({k: ""}) + _tpl(repository_ctx, "BUILD", substitutions) + + +def _tensorrt_configure_impl(repository_ctx): + """Implementation of the tensorrt_configure repository rule.""" + if _TENSORRT_INSTALL_PATH not in repository_ctx.os.environ: + _create_dummy_repository(repository_ctx) + return + + if (get_cpu_value(repository_ctx) != "Linux"): + auto_configure_fail("TensorRT is supported only on Linux.") + if _TF_TENSORRT_VERSION not in repository_ctx.os.environ: + auto_configure_fail("TensorRT library (libnvinfer) version is not set.") + trt_install_path = repository_ctx.os.environ[_TENSORRT_INSTALL_PATH].strip() + if not repository_ctx.path(trt_install_path).exists: + auto_configure_fail( + "Cannot find TensorRT install path %s." % trt_install_path) + + # Set up the symbolic links for the library files. + trt_lib_version = _trt_lib_version(repository_ctx, trt_install_path) + trt_libs = _find_trt_libs(repository_ctx, trt_install_path, trt_lib_version) + trt_lib_src = [] + trt_lib_dest = [] + for lib in trt_libs.values(): + trt_lib_src.append(lib.src_file_path) + trt_lib_dest.append(lib.dst_file_name) + genrules = [ + symlink_genrule_for_dir(repository_ctx, None, "tensorrt/lib/", + "tensorrt_lib", trt_lib_src, trt_lib_dest) + ] + + # Set up the symbolic links for the header files. + trt_header_dir = _find_trt_header_dir(repository_ctx, trt_install_path) + src_files = [ + "%s/%s" % (trt_header_dir, header) for header in _TF_TENSORRT_HEADERS + ] + dest_files = _TF_TENSORRT_HEADERS + genrules.append( + symlink_genrule_for_dir(repository_ctx, None, "tensorrt/include/", + "tensorrt_include", src_files, dest_files)) + + # Set up config file. + _tpl(repository_ctx, "build_defs.bzl", {"%{tensorrt_is_configured}": "True"}) + + # Set up BUILD file. + substitutions = { + "%{tensorrt_genrules}": "\n".join(genrules), + "%{tensorrt_headers}": '":tensorrt_include"', + } + for lib in _TF_TENSORRT_LIBS: + k = "%%{%s}" % lib.replace("nv", "nv_") + v = '"tensorrt/lib/%s"' % trt_libs[lib].dst_file_name + substitutions.update({k: v}) + _tpl(repository_ctx, "BUILD", substitutions) + + +tensorrt_configure = repository_rule( + implementation=_tensorrt_configure_impl, + environ=[ + _TENSORRT_INSTALL_PATH, + _TF_TENSORRT_VERSION, + ], +) +"""Detects and configures the local CUDA toolchain. + +Add the following to your WORKSPACE FILE: + +```python +tensorrt_configure(name = "local_config_tensorrt") +``` + +Args: + name: A unique name for this workspace rule. +""" diff --git a/third_party/toolchains/clang6/BUILD b/third_party/toolchains/clang6/BUILD new file mode 100644 index 0000000000..ffd0fb0cdc --- /dev/null +++ b/third_party/toolchains/clang6/BUILD @@ -0,0 +1 @@ +package(default_visibility = ["//visibility:public"]) diff --git a/third_party/toolchains/clang6/CROSSTOOL.tpl b/third_party/toolchains/clang6/CROSSTOOL.tpl new file mode 100644 index 0000000000..6b7e5a8808 --- /dev/null +++ b/third_party/toolchains/clang6/CROSSTOOL.tpl @@ -0,0 +1,587 @@ +major_version: "v1" +minor_version: "llvm:6.0.0" +default_target_cpu: "k8" + +default_toolchain { + cpu: "k8" + toolchain_identifier: "k8-clang-6.0-cxx-4.8-linux-gnu" +} + +toolchain { + compiler: "clang6" # bazel build --compiler=clang6 + target_cpu: "k8" # bazel build --cpu=k8 + target_libc: "GLIBC_2.19" # bazel build --glibc=GLIBC_2.19 + + abi_libc_version: "2.19" + abi_version: "gcc-4.8-cxx11" + builtin_sysroot: "" + cc_target_os: "linux-gnu" + default_python_version: "python2.7" + dynamic_runtimes_filegroup: "dynamic-runtime-libs-k8" + host_system_name: "x86_64-unknown-linux-gnu" + needsPic: true + static_runtimes_filegroup: "static-runtime-libs-k8" + supports_embedded_runtimes: true + supports_fission: true + supports_gold_linker: true + supports_incremental_linker: true + supports_interface_shared_objects: true + supports_normalizing_ar: true + supports_start_end_lib: true + supports_thin_archives: true + target_system_name: "x86_64-unknown-linux-gnu" + toolchain_identifier: "k8-clang-6.0-cxx-4.8-linux-gnu" + + tool_path { name: "ar" path: "%package(@local_config_clang6//clang6)%/llvm/bin/llvm-ar" } + tool_path { name: "as" path: "%package(@local_config_clang6//clang6)%/llvm/bin/llvm-as" } + tool_path { name: "compat-ld" path: "%package(@local_config_clang6//clang6)%/llvm/bin/ld.lld" } + tool_path { name: "cpp" path: "%package(@local_config_clang6//clang6)%/llvm/bin/llvm-cpp" } + tool_path { name: "dwp" path: "%package(@local_config_clang6//clang6)%/llvm/bin/llvm-dwp" } + tool_path { name: "gcc" path: "%package(@local_config_clang6//clang6)%/llvm/bin/clang" } + tool_path { name: "gcov" path: "%package(@local_config_clang6//clang6)%/llvm/bin/llvm-cov" } + tool_path { name: "ld" path: "%package(@local_config_clang6//clang6)%/llvm/bin/ld.lld" } + tool_path { name: "llvm-profdata" path: "%package(@local_config_clang6//clang6)%/llvm/bin/llvm-profdata" } + tool_path { name: "nm" path: "%package(@local_config_clang6//clang6)%/llvm/bin/llvm-nm" } + tool_path { name: "objcopy" path: "%package(@local_config_clang6//clang6)%/llvm/bin/llvm-objcopy" } + tool_path { name: "objdump" path: "%package(@local_config_clang6//clang6)%/sbin/objdump" } + tool_path { name: "strip" path: "%package(@local_config_clang6//clang6)%/sbin/strip" } + + unfiltered_cxx_flag: "-no-canonical-prefixes" + + # Make C++ compilation deterministic. Use linkstamping instead of these + # compiler symbols. + unfiltered_cxx_flag: "-Wno-builtin-macro-redefined" + unfiltered_cxx_flag: "-D__DATE__=\"redacted\"" + unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\"" + unfiltered_cxx_flag: "-D__TIME__=\"redacted\"" + + objcopy_embed_flag: "-I" + objcopy_embed_flag: "binary" + + # This action_config makes features flags propagate + # to CC_FLAGS for genrules, and eventually skylark. + action_config { + action_name: "cc-flags-make-variable" + config_name: "cc-flags-make-variable" + } + + # Security hardening on by default. + # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases. + # We need to undef it before redefining it as some distributions now have + # it enabled by default. + compiler_flag: "-U_FORTIFY_SOURCE" + compiler_flag: "-D_FORTIFY_SOURCE=1" + compiler_flag: "-fstack-protector" + linker_flag: "-Wl,-z,relro,-z,now" + + # This adds a little bit more durability to our Clang build. + # + # At the moment, this only only be needed for: + # - add_boringssl_s390x.patch: --Wa,--noexecstack + # + # Folks who do maintenance work on TF Bazel Clang should consider + # commenting out these lines, while doing that work, to gain a better + # understanding of what the intersection of support looks like between GCC + # and Clang. Please note that, Bazel does not support + # -Xclang-only / -Xgcc-only. + compiler_flag: "-Wno-unknown-warning-option" + compiler_flag: "-Wno-unused-command-line-argument" + compiler_flag: "-Wno-ignored-optimization-argument" + + #### Common compiler options. #### + compiler_flag: "-D_REENTRANT" + compiler_flag: "-D__STDC_FORMAT_MACROS" + compiler_flag: "-DSUPPRESS_USE_FILE_OFFSET64" + compiler_flag: "-Wall" + compiler_flag: "-Wformat-security" + compiler_flag: "-Wframe-larger-than=16384" + compiler_flag: "-Wno-char-subscripts" + compiler_flag: "-Wno-error=deprecated-declarations" + compiler_flag: "-Wno-uninitialized" + compiler_flag: "-Wno-sign-compare" + compiler_flag: "-Wno-strict-overflow" + compiler_flag: "-Wno-unused-function" + compiler_flag: "-fdiagnostics-show-option" + compiler_flag: "-fmessage-length=0" + compiler_flag: "-fno-exceptions" + compiler_flag: "-fno-omit-frame-pointer" + compiler_flag: "-fno-strict-aliasing" + compiler_flag: "-fno-use-init-array" + compiler_flag: "-funsigned-char" + compiler_flag: "-gmlt" + cxx_flag: "-Wno-deprecated" + cxx_flag: "-Wno-invalid-offsetof" # Needed for protobuf code (2017-11-07) + cxx_flag: "-fshow-overloads=best" + compiler_flag: "-Wthread-safety-analysis" + + # Python extensions unfortunately make this go wild. + compiler_flag: "-Wno-writable-strings" + + # GCC's warning produces too many false positives: + cxx_flag: "-Woverloaded-virtual" + cxx_flag: "-Wnon-virtual-dtor" + + # Enable coloring even if there's no attached terminal. Bazel removes the + # escape sequences if --nocolor is specified. This isn't supported by gcc + # on Ubuntu 14.04. + compiler_flag: "-fcolor-diagnostics" + + # Disable some broken warnings from Clang. + compiler_flag: "-Wno-ambiguous-member-template" + compiler_flag: "-Wno-pointer-sign" + + # These warnings have a low signal to noise ratio. + compiler_flag: "-Wno-reserved-user-defined-literal" + compiler_flag: "-Wno-return-type-c-linkage" + compiler_flag: "-Wno-invalid-source-encoding" + + # Per default we switch off any layering related warnings. + compiler_flag: "-Wno-private-header" + + # Clang-specific warnings that we explicitly enable for TensorFlow. Some of + # these aren't on by default, or under -Wall, or are subsets of warnings + # turned off above. + compiler_flag: "-Wfloat-overflow-conversion" + compiler_flag: "-Wfloat-zero-conversion" + compiler_flag: "-Wfor-loop-analysis" + compiler_flag: "-Wgnu-redeclared-enum" + compiler_flag: "-Winfinite-recursion" + compiler_flag: "-Wliteral-conversion" + compiler_flag: "-Wself-assign" + compiler_flag: "-Wstring-conversion" + compiler_flag: "-Wtautological-overlap-compare" + compiler_flag: "-Wunused-comparison" + compiler_flag: "-Wvla" + cxx_flag: "-Wdeprecated-increment-bool" + + # Clang code-generation flags for performance optimization. + compiler_flag: "-faligned-allocation" + compiler_flag: "-fnew-alignment=8" + + # Clang defaults to C99 while GCC defaults to C89. GCC plugins are written in + # C89 and don't have a BUILD rule we could add a copts flag to. + gcc_plugin_compiler_flag: "-std=gnu89" + + compilation_mode_flags { + mode: FASTBUILD + } + + compilation_mode_flags { + mode: DBG + compiler_flag: "-g" + } + + compilation_mode_flags { + mode: OPT + compiler_flag: "-g0" + compiler_flag: "-fdebug-types-section" + compiler_flag: "-DNDEBUG" + compiler_flag: "-fno-split-dwarf-inlining" + compiler_flag: "-Os" + compiler_flag: "-fexperimental-new-pass-manager" + compiler_flag: "-fdebug-info-for-profiling" + compiler_flag: "-ffunction-sections" + compiler_flag: "-fdata-sections" + linker_flag: "-Wl,--gc-sections" + linker_flag: "-Wl,-z,relro,-z,now" + } + + # Features indicating whether this is a host compile or not. Exactly one of + # these will be implicitly provided by bazel. + feature { name: "host" } + feature { name: "nonhost" } + + # Features indicating which compiler will be used for code generation. + feature { + name: "llvm_codegen" + provides: "codegen" + enabled: true + } + + # Features for compilation modes. Exactly one of these will be implicitly + # provided by bazel. + feature { name: "fastbuild" } + feature { name: "dbg" } + feature { name: "opt" } + + # Features controlling the C++ language mode. + feature { + name: "c++11" + provides: "c++std" + flag_set { + action: "c++-compile" + action: "c++-header-parsing" + action: "c++-header-preprocessing" + action: "c++-module-compile" + action: "linkstamp-compile" + flag_group { + flag: "-nostdinc++" + flag: "-std=c++11" + flag: "-Wc++14-extensions" + flag: "-Wc++2a-extensions" + flag: "-Wno-binary-literal" + } + } + } + feature { + name: "c++14" + provides: "c++std" + flag_set { + action: "c++-compile" + action: "c++-header-parsing" + action: "c++-header-preprocessing" + action: "c++-module-compile" + action: "linkstamp-compile" + flag_group { + flag: "-nostdinc++" + flag: "-std=c++14" + flag: "-Wc++11-compat" + flag: "-Wno-c++11-compat-binary-literal" + flag: "-Wc++2a-extensions" + } + } + } + feature { + name: "c++17" + provides: "c++std" + flag_set { + action: "c++-compile" + action: "c++-header-parsing" + action: "c++-header-preprocessing" + action: "c++-module-compile" + action: "linkstamp-compile" + flag_group { + flag: "-nostdinc++" + flag: "-std=c++17" + flag: "-Wc++11-compat" + flag: "-Wno-c++11-compat-binary-literal" + flag: "-Wc++2a-extensions" + } + } + } + feature { + name: "c++2a" + provides: "c++std" + flag_set { + action: "c++-compile" + action: "c++-header-parsing" + action: "c++-header-preprocessing" + action: "c++-module-compile" + action: "linkstamp-compile" + flag_group { + flag: "-nostdinc++" + flag: "-std=c++2a" + flag: "-Wc++11-compat" + flag: "-Wno-c++11-compat-binary-literal" + } + } + } + feature { + name: "c++default" + enabled: true + flag_set { + # Provide the c++11 flags if no standard is selected + with_feature { + not_feature: "c++11" + not_feature: "c++14" + not_feature: "c++17" + not_feature: "c++2a" + } + action: "c++-compile" + action: "c++-header-parsing" + action: "c++-header-preprocessing" + action: "c++-module-compile" + action: "linkstamp-compile" + flag_group { + flag: "-nostdinc++" + flag: "-std=c++11" + flag: "-Wc++14-extensions" + flag: "-Wc++2a-extensions" + flag: "-Wno-binary-literal" + } + } + } + + feature { + name: "use_compiler_rt" + requires { feature: "llvm_codegen" } + # TODO(saugustine): At the moment, "use_compiler_rt" also + # requires "linking_mode_flags { mode: FULLY_STATIC" ... }, + # but that isn't a feature. We should probably convert it. + flag_set { + action: "c++-link" + action: "c++-link-interface-dynamic-library" + action: "c++-link-dynamic-library" + action: "c++-link-executable" + # "link" is a misnomer for these actions. They are really just + # invocations of ar. + #action: "c++-link-pic-static-library" + #action: "c++-link-static-library" + #action: "c++-link-alwayslink-static-library" + #action: "c++-link-pic-static-library" + #action: "c++-link-alwayslink-pic-static-library" + flag_group { + flag: "-rtlib=compiler-rt" + flag: "-lunwind" + } + } + } + + feature { + name: "pie" + flag_set { + action: "assemble" + action: "preprocess-assemble" + action: "c-compile" + action: "c++-compile" + action: "c++-header-parsing" + action: "c++-header-preprocessing" + action: "c++-module-compile" + action: "c++-module-codegen" + action: "cc-flags-make-variable" + action: "lto-backend" + action: "linkstamp-compile" + flag_group { + flag: "-mpie-copy-relocations" + flag: "-fPIE" + } + } + flag_set { + action: "cc-flags-make-variable" + action: "c++-link-executable" + flag_group { + flag: "-pie" + } + } + } + + # Pic must appear after pie, because pic may need to override pie, and bazel + # turns it on selectively. These don't interact with other options. + # + # TODO: In practice, normal vs pic vs pie is a ternary mode. We should + # implement it that way. This will require changes to bazel, which only + # calculates whether or not pic is needed, not pie. + # + # NOTE: Bazel might make this all a moot point. + feature { + name: "pic" + flag_set { + action: "assemble" + action: "preprocess-assemble" + action: "c-compile" + action: "c++-compile" + action: "c++-module-codegen" + action: "c++-module-compile" + action: "linkstamp-compile" + expand_if_all_available: "pic" + flag_group { + flag: "-fPIC" + } + } + } + + feature { + name: "gold" + enabled: true + flag_set { + action: "c++-link-executable" + action: "c++-link-dynamic-library" + action: "c++-link-interface-dynamic-library" + flag_group { + expand_if_none_available: "lto" + flag: "-fuse-ld=gold" + } + } + } + + # This is great if you want linking TensorFlow to take ten minutes. + feature { + name: "lto" + requires { feature: "nonhost" } + flag_set { + action: "c-compile" + action: "c++-compile" + flag_group { + flag: "-flto=thin" + } + } + flag_set { + action: "c++-link-executable" + action: "c++-link-dynamic-library" + action: "c++-link-interface-dynamic-library" + flag_group { + flag: "-flto=thin" + } + } + } + + feature { + name: "parse_headers" + flag_set { + action: "c++-header-parsing" + flag_group { + flag: "-xc++-header" + flag: "-fsyntax-only" + } + } + } + + feature { + name: "preprocess_headers" + flag_set { + action: "c++-header-preprocessing" + flag_group { + flag: "-xc++" + flag: "-E" + } + } + } + + feature { + name: "per_object_debug_info" + flag_set { + action: "c-compile" + action: "c++-compile" + action: "c++-module-codegen" + action: "assemble" + action: "preprocess-assemble" + action: "lto-backend" + flag_group { + flag: "-gsplit-dwarf" + flag: "-ggnu-pubnames" + } + } + flag_set { + action: "c++-link-executable" + action: "c++-link-dynamic-library" + action: "c++-link-interface-dynamic-library" + flag_group { + expand_if_all_available: "is_using_fission" + flag: "-Wl,--gdb-index" + } + } + } + + feature { + name: "xray" + requires { + feature: "llvm_codegen" + feature: "nonhost" + } + flag_set { + action: "c-compile" + action: "c++-compile" + action: "c++-header-parsing" + action: "c++-header-preprocessing" + action: "c++-module-compile" + action: "c++-link-interface-dynamic-library" + action: "c++-link-dynamic-library" + action: "c++-link-executable" + flag_group { + flag: "-fxray-instrument" + } + } + } + + feature { + name: "minimal_ubsan" + requires { feature: "llvm_codegen" } + flag_set { + action: "c-compile" + action: "c++-compile" + action: "c++-header-parsing" + action: "c++-header-preprocessing" + action: "c++-module-compile" + action: "c++-module-codegen" + flag_group { + flag: "-fsanitize=return,returns-nonnull-attribute,vla-bound,unreachable,float-cast-overflow" + flag: "-fsanitize-trap=all" + flag: "-DUNDEFINED_BEHAVIOR_SANITIZER" + } + } + } + + feature { + name: "minimal_ubsan_enabled_by_default" + requires { + feature: "llvm_codegen" + feature: "fastbuild" + } + enabled: true + implies: "minimal_ubsan" + } + + cxx_builtin_include_directory: "%package(@local_config_clang6//clang6)%/llvm/lib/clang/6.0.0/include" + cxx_builtin_include_directory: "/usr/include" + + unfiltered_cxx_flag: "-cxx-isystem" + unfiltered_cxx_flag: "/usr/include/c++/4.8" + unfiltered_cxx_flag: "-cxx-isystem" + unfiltered_cxx_flag: "/usr/include/x86_64-linux-gnu/c++/4.8" + unfiltered_cxx_flag: "-isystem" + unfiltered_cxx_flag: "%package(@local_config_clang6//clang6)%/llvm/lib/clang/6.0.0/include" + unfiltered_cxx_flag: "-isystem" + unfiltered_cxx_flag: "/usr/include/x86_64-linux-gnu" + unfiltered_cxx_flag: "-isystem" + unfiltered_cxx_flag: "/usr/include" + + linker_flag: "-Wl,--build-id=md5" + linker_flag: "-Wl,--fatal-warnings" + linker_flag: "-Wl,--hash-style=gnu" + linker_flag: "-no-canonical-prefixes" + linker_flag: "--target=x86_64-unknown-linux-gnu" + + linker_flag: "-L/usr/lib/gcc/x86_64-linux-gnu/4.8" + + # This is the minimum x86 architecture TensorFlow supports. + compiler_flag: "-DARCH_K8" + compiler_flag: "-m64" + + # These are for Linux. + ld_embed_flag: "-melf_x86_64" + linker_flag: "-Wl,--eh-frame-hdr" + linker_flag: "-Wl,-z,max-page-size=0x1000" + + # Google never uses the stack like a heap, e.g. alloca(), because tcmalloc + # and jemalloc are so fast. However copts=["$(STACK_FRAME_UNLIMITED)"] can be + # specified when that can't be the case. + make_variable { + name: "STACK_FRAME_UNLIMITED" + value: "-Wframe-larger-than=100000000 -Wno-vla" + } + + # These flags are for folks who build C/C++ code inside genrules. + make_variable { + name: "CC_FLAGS" + value: "-no-canonical-prefixes --target=x86_64-unknown-linux-gnu -fno-omit-frame-pointer -fno-tree-vrp -msse3" + } + + feature { + name: "copts" + flag_set { + expand_if_all_available: "copts" + action: "assemble" + action: "preprocess-assemble" + action: "c-compile" + action: "c++-compile" + action: "c++-header-parsing" + action: "c++-header-preprocessing" + action: "c++-module-compile" + action: "c++-module-codegen" + action: "lto-backend" + flag_group { + iterate_over: "copts" + flag: "%{copts}" + } + } + } + + # Please do not statically link libstdc++. This would probably lead to a lot + # of bloat since OpKernels need to use linkstatic=1 because b/27630669 and + # it could cause memory leaks since Python uses dlopen() on our libraries: + # https://stackoverflow.com/a/35015415 + linker_flag: "-lstdc++" + linker_flag: "-lm" + linker_flag: "-lpthread" + linker_flag: "-l:/lib/x86_64-linux-gnu/libc-2.19.so" +} diff --git a/third_party/toolchains/clang6/README.md b/third_party/toolchains/clang6/README.md new file mode 100644 index 0000000000..0c6be25a0e --- /dev/null +++ b/third_party/toolchains/clang6/README.md @@ -0,0 +1,101 @@ +# TensorFlow Bazel Clang + +This is a specialized toolchain that uses an old Debian with a new Clang that +can cross compile to any x86_64 microarchitecture. It's intended to build Linux +binaries that only require the following ABIs: + +- GLIBC_2.18 +- CXXABI_1.3.7 (GCC 4.8.3) +- GCC_4.2.0 + +Which are available on at least the following Linux platforms: + +- Ubuntu 14+ +- CentOS 7+ +- Debian 8+ +- SuSE 13.2+ +- Mint 17.3+ +- Manjaro 0.8.11 + +# System Install + +On Debian 8 (Jessie) Clang 6.0 can be installed as follows: + +```sh +cat >>/etc/apt/sources.list <<'EOF' +deb http://apt.llvm.org/jessie/ llvm-toolchain-jessie main +deb-src http://apt.llvm.org/jessie/ llvm-toolchain-jessie main +EOF +wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - +apt-key fingerprint |& grep '6084 F3CF 814B 57C1 CF12 EFD5 15CF 4D18 AF4F 7421' +apt-get update +apt-get install clang lld +``` + +# Bazel Configuration + +This toolchain can compile TensorFlow in 2m30s on a 96-core Skylake GCE VM if +the following `.bazelrc` settings are added: + +``` +startup --host_jvm_args=-Xmx30G +startup --host_jvm_args=-Xms30G +startup --host_jvm_args=-XX:MaxNewSize=3g +startup --host_jvm_args=-XX:-UseAdaptiveSizePolicy +startup --host_jvm_args=-XX:+UseConcMarkSweepGC +startup --host_jvm_args=-XX:TargetSurvivorRatio=70 +startup --host_jvm_args=-XX:SurvivorRatio=6 +startup --host_jvm_args=-XX:+UseCMSInitiatingOccupancyOnly +startup --host_jvm_args=-XX:CMSFullGCsBeforeCompaction=1 +startup --host_jvm_args=-XX:CMSInitiatingOccupancyFraction=75 + +build --jobs=100 +build --local_resources=200000,100,100 +build --crosstool_top=@local_config_clang6//clang6 +build --noexperimental_check_output_files +build --nostamp +build --config=opt +build --noexperimental_check_output_files +build --copt=-march=native +build --host_copt=-march=native +``` + +# x86_64 Microarchitectures + +## Intel CPU Line + +- 2003 P6 M SSE SSE2 +- 2004 prescott SSE3 SSSE3 (-march=prescott) +- 2006 core X64 SSE4.1 (only on 45nm variety) (-march=core2) +- 2008 nehalem SSE4.2 VT-x VT-d (-march=nehalem) +- 2010 westmere CLMUL AES (-march=westmere) +- 2012 sandybridge AVX TXT (-march=sandybridge) +- 2012 ivybridge F16C MOVBE (-march=ivybridge) +- 2013 haswell AVX2 TSX BMI2 FMA (-march=haswell) +- 2014 broadwell RDSEED ADCX PREFETCHW (-march=broadwell - works on trusty gcc4.9) +- 2015 skylake SGX ADX MPX AVX-512[xeon-only] (-march=skylake / -march=skylake-avx512 - needs gcc7) +- 2018 cannonlake AVX-512 SHA (-march=cannonlake - needs clang5) + +## Intel Low Power CPU Line + +- 2013 silvermont SSE4.1 SSE4.2 VT-x (-march=silvermont) +- 2016 goldmont SHA (-march=goldmont - needs clang5) + +## AMD CPU Line + +- 2003 k8 SSE SSE2 (-march=k8) +- 2005 k8 (Venus) SSE3 (-march=k8-sse3) +- 2008 barcelona SSE4a?! (-march=barcelona) +- 2011 bulldozer SSE4.1 SSE4.2 CLMUL AVX AES FMA4?! (-march=bdver1) +- 2011 piledriver FMA (-march=bdver2) +- 2015 excavator AVX2 BMI2 MOVBE (-march=bdver4) + +## Google Compute Engine Supported CPUs + +- 2012 sandybridge 2.6gHz -march=sandybridge +- 2012 ivybridge 2.5gHz -march=ivybridge +- 2013 haswell 2.3gHz -march=haswell +- 2014 broadwell 2.2gHz -march=broadwell +- 2015 skylake 2.0gHz -march=skylake-avx512 + +See: <https://cloud.google.com/compute/docs/cpu-platforms> diff --git a/third_party/toolchains/clang6/clang.BUILD b/third_party/toolchains/clang6/clang.BUILD new file mode 100644 index 0000000000..802d62c17c --- /dev/null +++ b/third_party/toolchains/clang6/clang.BUILD @@ -0,0 +1,162 @@ +package(default_visibility = ["//visibility:public"]) + +# Please note that the output of these tools is unencumbered. +licenses(["restricted"]) # NCSA, GPLv3 (e.g. gold) + +filegroup( + name = "ar", + srcs = ["llvm/bin/llvm-ar"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "as", + srcs = ["llvm/bin/llvm-as"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "cpp", + srcs = ["llvm/bin/llvm-cpp"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "dwp", + srcs = ["llvm/bin/llvm-dwp"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "gcc", + srcs = ["llvm/bin/clang"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "gcov", + srcs = ["llvm/bin/llvm-cov"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "ld", + srcs = ["llvm/bin/ld.lld"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "nm", + srcs = ["llvm/bin/llvm-nm"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "objcopy", + srcs = ["llvm/bin/llvm-objcopy"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "objdump", + srcs = ["llvm/bin/llvm-objdump"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "profdata", + srcs = ["llvm/bin/llvm-profdata"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "strip", + srcs = ["sbin/strip"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "xray", + srcs = ["llvm/bin/llvm-xray"], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "includes", + srcs = glob(["llvm/lib/clang/6.0.0/include/**"]), + output_licenses = ["unencumbered"], +) + +filegroup( + name = "libraries", + srcs = glob([ + "lib/*.*", + "lib/clang/6.0.0/lib/linux/*.*", + ]), + output_licenses = ["unencumbered"], +) + +filegroup( + name = "compiler_files", + srcs = [ + ":as", + ":gcc", + ":includes", + ], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "linker_files", + srcs = [ + ":ar", + ":ld", + ":libraries", + ], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "all_files", + srcs = [ + ":compiler_files", + ":dwp", + ":gcov", + ":linker_files", + ":nm", + ":objcopy", + ":objdump", + ":profdata", + ":strip", + ":xray", + ], + output_licenses = ["unencumbered"], +) + +filegroup( + name = "empty", + srcs = [], # bazel crashes without this + output_licenses = ["unencumbered"], +) + +cc_toolchain_suite( + name = "clang6", + toolchains = { + "k8|clang6": ":clang6-k8", + }, +) + +cc_toolchain( + name = "clang6-k8", + all_files = ":all_files", + compiler_files = ":compiler_files", + cpu = "k8", + dwp_files = ":dwp", + dynamic_runtime_libs = [":empty"], + linker_files = ":linker_files", + objcopy_files = ":objcopy", + output_licenses = ["unencumbered"], + static_runtime_libs = [":empty"], + strip_files = ":strip", + supports_param_files = 1, +) diff --git a/third_party/toolchains/clang6/repo.bzl b/third_party/toolchains/clang6/repo.bzl new file mode 100644 index 0000000000..b81f44506f --- /dev/null +++ b/third_party/toolchains/clang6/repo.bzl @@ -0,0 +1,30 @@ +"""Repository rule for Debian 8 Jessie Clang-6.0 portable Linux builds.""" + +def _clang6_configure(ctx): + # TODO(jart): It'd probably be better to use Bazel's struct.to_proto() + # method to generate a gigantic CROSSTOOL file that allows + # Clang to support everything. + ctx.symlink( + ctx.os.environ.get('TF_LLVM_PATH', + '/usr/lib/llvm-6.0'), + 'clang6/llvm') + ctx.symlink( + ctx.os.environ.get('STRIP', '/usr/bin/strip'), + 'clang6/sbin/strip') + ctx.symlink( + ctx.os.environ.get('OBJDUMP', '/usr/bin/objdump'), + 'clang6/sbin/objdump') + ctx.symlink(ctx.attr._build, 'clang6/BUILD') + ctx.template('clang6/CROSSTOOL', ctx.attr._crosstool, { + '%package(@local_config_clang6//clang6)%': str(ctx.path('clang6')), + }) + +clang6_configure = repository_rule( + implementation = _clang6_configure, + attrs = { + '_build': attr.label( + default=str(Label('//third_party/toolchains/clang6:clang.BUILD'))), + '_crosstool': attr.label( + default=str(Label('//third_party/toolchains/clang6:CROSSTOOL.tpl'))), + }, +) |