diff options
author | Andrew Harp <andrewharp@google.com> | 2017-03-01 17:59:22 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-03-01 18:08:24 -0800 |
commit | 3e975ea978bac4d861bb09328b06f3c316212611 (patch) | |
tree | 79bac044c9723df8443495eb962c2dd98a2ed421 /third_party | |
parent | 8043a27ed77f59bb68409070f2bfa01df0e04b89 (diff) |
Merge changes from github.
Change: 148954491
Diffstat (limited to 'third_party')
-rw-r--r-- | third_party/curl.BUILD | 46 | ||||
-rw-r--r-- | third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h | 98 | ||||
-rw-r--r-- | third_party/mkl/BUILD | 11 | ||||
-rw-r--r-- | third_party/nccl.BUILD | 18 | ||||
-rwxr-xr-x | third_party/sycl/crosstool/computecpp.tpl | 33 |
5 files changed, 165 insertions, 41 deletions
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD index dde8e6cdb7..557c2885eb 100644 --- a/third_party/curl.BUILD +++ b/third_party/curl.BUILD @@ -204,13 +204,13 @@ cc_library( "lib/wildcard.h", "lib/x509asn1.h", ] + select({ - ":darwin": [ + "@//tensorflow:darwin": [ "lib/vtls/darwinssl.c", ], - ":ios": [ + "@//tensorflow:ios": [ "lib/vtls/darwinssl.c", ], - ":windows": [ + "@//tensorflow:windows": [ "lib/asyn-thread.c", "lib/inet_ntop.c", "lib/system_win32.c", @@ -231,7 +231,7 @@ cc_library( "include/curl/typecheck-gcc.h", ], copts = select({ - ":windows": [ + "@//tensorflow:windows": [ "/I%prefix%/curl/lib", "/DHAVE_CONFIG_H", "/DCURL_DISABLE_FTP", @@ -255,10 +255,10 @@ cc_library( "-Wno-string-plus-int", ], }) + select({ - ":darwin": [ + "@//tensorflow:darwin": [ "-fno-constant-cfstrings", ], - ":windows": [ + "@//tensorflow:windows": [ # See curl.h for discussion of write size and Windows "/DCURL_MAX_WRITE_SIZE=16384", ], @@ -268,17 +268,17 @@ cc_library( }), includes = ["include"], linkopts = select({ - ":android": [ + "@//tensorflow:android": [ "-pie", ], - ":darwin": [ + "@//tensorflow:darwin": [ "-Wl,-framework", "-Wl,CoreFoundation", "-Wl,-framework", "-Wl,Security", ], - ":ios": [], - ":windows": [ + "@//tensorflow:ios": [], + "@//tensorflow:windows": [ "ws2_32.lib", ], "//conditions:default": [ @@ -289,8 +289,8 @@ cc_library( deps = [ "@zlib_archive//:zlib", ] + select({ - ":ios": [], - ":windows": [], + "@//tensorflow:ios": [], + "@//tensorflow:windows": [], "//conditions:default": [ "@boringssl//:ssl", ], @@ -386,7 +386,7 @@ cc_binary( "src/tool_xattr.h", ], copts = select({ - ":windows": [ + "@//tensorflow:windows": [ "/I%prefix%/curl/lib", "/DHAVE_CONFIG_H", "/DCURL_DISABLE_LIBCURL_OPTION", @@ -657,23 +657,3 @@ genrule( "EOF", ]), ) - -config_setting( - name = "ios", - values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, -) - -config_setting( - name = "darwin", - values = {"cpu": "darwin"}, -) - -config_setting( - name = "windows", - values = {"cpu": "x64_windows_msvc"}, -) - -config_setting( - name = "android", - values = {"crosstool_top": "//external:android/crosstool"}, -) diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h index 98deb1742e..078be83e0d 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h @@ -11,6 +11,13 @@ typedef struct Packet32q8i { Packet32q8i(__m256i val) : val(val) {} } Packet32q8i; +typedef struct Packet16q16i { + __m256i val; + operator __m256i() const { return val; } + Packet16q16i(); + Packet16q16i(__m256i val) : val(val) {} +} Packet16q16i; + typedef struct Packet32q8u { __m256i val; operator __m256i() const { return val; } @@ -32,6 +39,13 @@ typedef struct Packet16q8u { Packet16q8u(__m128i val) : val(val) {} } Packet16q8u; +typedef struct Packet8q16i { + __m128i val; + operator __m128i() const { return val; } + Packet8q16i(); + Packet8q16i(__m128i val) : val(val) {} +} Packet8q16i; + typedef struct Packet8q32i { __m256i val; operator __m256i() const { return val; } @@ -92,6 +106,28 @@ struct packet_traits<QUInt8> : default_packet_traits { }; }; template <> +struct packet_traits<QInt16> : default_packet_traits { + typedef Packet16q16i type; + typedef Packet8q16i half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 16, + }; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0 + }; +}; +template <> struct packet_traits<QInt32> : default_packet_traits { typedef Packet8q32i type; typedef Packet4q32i half; @@ -122,6 +158,12 @@ struct unpacket_traits<Packet32q8i> { enum { size = 32, alignment=Aligned32 }; }; template <> +struct unpacket_traits<Packet16q16i> { + typedef QInt16 type; + typedef Packet8q16i half; + enum { size = 16, alignment=Aligned32 }; +}; +template <> struct unpacket_traits<Packet32q8u> { typedef QUInt8 type; typedef Packet16q8u half; @@ -146,6 +188,11 @@ EIGEN_STRONG_INLINE Packet32q8u ploadu<Packet32q8u>(const QUInt8* from) { reinterpret_cast<const __m256i*>(from)); } template <> +EIGEN_STRONG_INLINE Packet16q16i ploadu<Packet16q16i>(const QInt16* from) { + EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256( + reinterpret_cast<const __m256i*>(from)); +} +template <> EIGEN_STRONG_INLINE Packet8q32i ploadu<Packet8q32i>(const QInt32* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256( reinterpret_cast<const __m256i*>(from)); @@ -163,6 +210,11 @@ EIGEN_STRONG_INLINE Packet32q8u pload<Packet32q8u>(const QUInt8* from) { reinterpret_cast<const __m256i*>(from)); } template <> +EIGEN_STRONG_INLINE Packet16q16i pload<Packet16q16i>(const QInt16* from) { + EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256( + reinterpret_cast<const __m256i*>(from)); +} +template <> EIGEN_STRONG_INLINE Packet8q32i pload<Packet8q32i>(const QInt32* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256( reinterpret_cast<const __m256i*>(from)); @@ -180,6 +232,11 @@ EIGEN_STRONG_INLINE void pstoreu<QUInt8>(QUInt8* to, const Packet32q8u& from) { reinterpret_cast<__m256i*>(to), from.val); } template <> +EIGEN_STRONG_INLINE void pstoreu<QInt16>(QInt16* to, const Packet16q16i& from) { + EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256( + reinterpret_cast<__m256i*>(to), from.val); +} +template <> EIGEN_STRONG_INLINE void pstoreu<QInt32>(QInt32* to, const Packet8q32i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256( reinterpret_cast<__m256i*>(to), from.val); @@ -192,6 +249,11 @@ EIGEN_STRONG_INLINE void pstore<QInt32>(QInt32* to, const Packet8q32i& from) { from.val); } template <> +EIGEN_STRONG_INLINE void pstore<QInt16>(QInt16* to, const Packet16q16i& from) { + EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to), + from.val); +} +template <> EIGEN_STRONG_INLINE void pstore<QUInt8>(QUInt8* to, const Packet32q8u& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to), from.val); @@ -208,6 +270,10 @@ EIGEN_STRONG_INLINE QInt32 pfirst<Packet8q32i>(const Packet8q32i& a) { return _mm_cvtsi128_si32(_mm256_castsi256_si128(a)); } template <> +EIGEN_STRONG_INLINE QInt16 pfirst<Packet16q16i>(const Packet16q16i& a) { + return _mm256_extract_epi16(a.val, 0); +} +template <> EIGEN_STRONG_INLINE QUInt8 pfirst<Packet32q8u>(const Packet32q8u& a) { return static_cast<uint8_t>(_mm256_extract_epi8(a.val, 0)); } @@ -237,6 +303,10 @@ EIGEN_STRONG_INLINE Packet8q32i padd<Packet8q32i>(const Packet8q32i& a, return _mm256_add_epi32(a.val, b.val); } template <> +EIGEN_STRONG_INLINE Packet16q16i pset1<Packet16q16i>(const QInt16& from) { + return _mm256_set1_epi16(from.value); +} +template <> EIGEN_STRONG_INLINE Packet8q32i psub<Packet8q32i>(const Packet8q32i& a, const Packet8q32i& b) { return _mm256_sub_epi32(a.val, b.val); @@ -265,6 +335,17 @@ EIGEN_STRONG_INLINE Packet8q32i pmax<Packet8q32i>(const Packet8q32i& a, } template <> +EIGEN_STRONG_INLINE Packet16q16i pmin<Packet16q16i>(const Packet16q16i& a, + const Packet16q16i& b) { + return _mm256_min_epi16(a.val, b.val); +} +template <> +EIGEN_STRONG_INLINE Packet16q16i pmax<Packet16q16i>(const Packet16q16i& a, + const Packet16q16i& b) { + return _mm256_max_epi16(a.val, b.val); +} + +template <> EIGEN_STRONG_INLINE Packet32q8u pmin<Packet32q8u>(const Packet32q8u& a, const Packet32q8u& b) { return _mm256_min_epu8(a.val, b.val); @@ -305,6 +386,23 @@ EIGEN_STRONG_INLINE QInt32 predux_max<Packet8q32i>(const Packet8q32i& a) { } template <> +EIGEN_STRONG_INLINE QInt16 predux_min<Packet16q16i>(const Packet16q16i& a) { + __m256i tmp = _mm256_min_epi16(a, _mm256_permute2f128_si256(a, a, 1)); + tmp = + _mm256_min_epi16(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2))); + tmp = _mm256_min_epi16(tmp, _mm256_shuffle_epi32(tmp, 1)); + return std::min(_mm256_extract_epi16(tmp, 0), _mm256_extract_epi16(tmp, 1)); +} +template <> +EIGEN_STRONG_INLINE QInt16 predux_max<Packet16q16i>(const Packet16q16i& a) { + __m256i tmp = _mm256_max_epi16(a, _mm256_permute2f128_si256(a, a, 1)); + tmp = + _mm256_max_epi16(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2))); + tmp = _mm256_max_epi16(tmp, _mm256_shuffle_epi32(tmp, 1)); + return std::max(_mm256_extract_epi16(tmp, 0), _mm256_extract_epi16(tmp, 1)); +} + +template <> EIGEN_STRONG_INLINE QUInt8 predux_min<Packet32q8u>(const Packet32q8u& a) { __m256i tmp = _mm256_min_epu8(a, _mm256_permute2f128_si256(a, a, 1)); tmp = diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD index ddaf29a086..7e95ebd355 100644 --- a/third_party/mkl/BUILD +++ b/third_party/mkl/BUILD @@ -8,12 +8,17 @@ config_setting( visibility = ["//visibility:public"], ) +load( + "//third_party/mkl:build_defs.bzl", + "if_mkl", +) + cc_library( name = "intel_binary_blob", - srcs = [ - "libiomp5.so", + srcs = if_mkl([ "libmklml_intel.so", - ], + "libiomp5.so", + ]), includes = ["."], visibility = ["//visibility:public"], ) diff --git a/third_party/nccl.BUILD b/third_party/nccl.BUILD index bb460a05e0..06b9b8ff68 100644 --- a/third_party/nccl.BUILD +++ b/third_party/nccl.BUILD @@ -43,6 +43,24 @@ cc_library( "-Iexternal/nccl_archive/src", "-O3", ] + cuda_default_copts(), + linkopts = select({ + "@%ws%//tensorflow:android": [ + "-pie", + ], + "@%ws%//tensorflow:darwin": [ + "-Wl,-framework", + "-Wl,CoreFoundation", + "-Wl,-framework", + "-Wl,Security", + ], + "@%ws%//tensorflow:ios": [], + "@%ws%//tensorflow:windows": [ + "ws2_32.lib", + ], + "//conditions:default": [ + "-lrt", + ], + }), visibility = ["//visibility:public"], deps = ["@local_config_cuda//cuda:cuda_headers"], ) diff --git a/third_party/sycl/crosstool/computecpp.tpl b/third_party/sycl/crosstool/computecpp.tpl index a5e6b9fe93..66dd9aea7b 100755 --- a/third_party/sycl/crosstool/computecpp.tpl +++ b/third_party/sycl/crosstool/computecpp.tpl @@ -26,9 +26,7 @@ def main(): if(output_file_index == 1): # we are linking - return subprocess.call([CPU_CXX_COMPILER] + compiler_flags) - - compiler_flags = compiler_flags + ['-D_GLIBCXX_USE_CXX11_ABI=0', '-DEIGEN_USE_SYCL=1'] + return subprocess.call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined']) # find what we compile compiling_cpp = 0 @@ -38,6 +36,28 @@ def main(): if(compited_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C', '.cxx'))): compiling_cpp = 1; + compiler_flags = compiler_flags + ['-D_GLIBCXX_USE_CXX11_ABI=0', '-DEIGEN_USE_SYCL=1', '-DTENSORFLOW_USE_SYCL', '-DEIGEN_HAS_C99_MATH'] + + if(compiling_cpp == 1): + # create a blacklist of folders that will be skipped when compiling with ComputeCpp + _skip = ["external", "llvm", ".cu.cc"] + # if compiling external project skip computecpp + if any(_folder in _skip for _folder in output_file_name): + return subprocess.call([CPU_CXX_COMPILER] + compiler_flags) + + if(compiling_cpp == 1): + # this is an optimisation that will check if compiled file has to be compiled with ComputeCpp + + _tmp_flags = [flag for flag in compiler_flags if not flag.startswith(('-o', output_file_name))] + # create preprocessed of the file + _cmd = " ".join([CPU_CXX_COMPILER] + _tmp_flags + ["-E"]) + # check if it has parallel_for< in it + _cmd += " | grep \".parallel_for\" > /dev/null" + ps = subprocess.call(_cmd, shell=True) + # if not call CXX compiler + if(ps != 0): + return subprocess.call([CPU_CXX_COMPILER] + compiler_flags) + if(compiling_cpp == 1): filename, file_extension = os.path.splitext(output_file_name) bc_out = filename + '.sycl' @@ -52,9 +72,12 @@ def main(): # dont want that in case of compiling with computecpp first host_compiler_flags = [flag for flag in compiler_flags if not flag.startswith(('-MF', '-MD',)) - if not '.d' in flag] + if not '.d' in flag + ] + + host_compiler_flags[host_compiler_flags.index('-c')] = "--include" - host_compiler_flags = ['-D_GLIBCXX_USE_CXX11_ABI=0', '-DTENSORFLOW_USE_SYCL', '-Wno-unused-variable', '-I', COMPUTECPP_INCLUDE, '--include', bc_out] + host_compiler_flags + host_compiler_flags = ['-xc++', '-D_GLIBCXX_USE_CXX11_ABI=0', '-DTENSORFLOW_USE_SYCL', '-Wno-unused-variable', '-I', COMPUTECPP_INCLUDE, '-c', bc_out] + host_compiler_flags x = subprocess.call([CPU_CXX_COMPILER] + host_compiler_flags) return x else: |