aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party
diff options
context:
space:
mode:
authorGravatar Andrew Harp <andrewharp@google.com>2017-03-01 17:59:22 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-03-01 18:08:24 -0800
commit3e975ea978bac4d861bb09328b06f3c316212611 (patch)
tree79bac044c9723df8443495eb962c2dd98a2ed421 /third_party
parent8043a27ed77f59bb68409070f2bfa01df0e04b89 (diff)
Merge changes from github.
Change: 148954491
Diffstat (limited to 'third_party')
-rw-r--r--third_party/curl.BUILD46
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h98
-rw-r--r--third_party/mkl/BUILD11
-rw-r--r--third_party/nccl.BUILD18
-rwxr-xr-xthird_party/sycl/crosstool/computecpp.tpl33
5 files changed, 165 insertions, 41 deletions
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index dde8e6cdb7..557c2885eb 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -204,13 +204,13 @@ cc_library(
"lib/wildcard.h",
"lib/x509asn1.h",
] + select({
- ":darwin": [
+ "@//tensorflow:darwin": [
"lib/vtls/darwinssl.c",
],
- ":ios": [
+ "@//tensorflow:ios": [
"lib/vtls/darwinssl.c",
],
- ":windows": [
+ "@//tensorflow:windows": [
"lib/asyn-thread.c",
"lib/inet_ntop.c",
"lib/system_win32.c",
@@ -231,7 +231,7 @@ cc_library(
"include/curl/typecheck-gcc.h",
],
copts = select({
- ":windows": [
+ "@//tensorflow:windows": [
"/I%prefix%/curl/lib",
"/DHAVE_CONFIG_H",
"/DCURL_DISABLE_FTP",
@@ -255,10 +255,10 @@ cc_library(
"-Wno-string-plus-int",
],
}) + select({
- ":darwin": [
+ "@//tensorflow:darwin": [
"-fno-constant-cfstrings",
],
- ":windows": [
+ "@//tensorflow:windows": [
# See curl.h for discussion of write size and Windows
"/DCURL_MAX_WRITE_SIZE=16384",
],
@@ -268,17 +268,17 @@ cc_library(
}),
includes = ["include"],
linkopts = select({
- ":android": [
+ "@//tensorflow:android": [
"-pie",
],
- ":darwin": [
+ "@//tensorflow:darwin": [
"-Wl,-framework",
"-Wl,CoreFoundation",
"-Wl,-framework",
"-Wl,Security",
],
- ":ios": [],
- ":windows": [
+ "@//tensorflow:ios": [],
+ "@//tensorflow:windows": [
"ws2_32.lib",
],
"//conditions:default": [
@@ -289,8 +289,8 @@ cc_library(
deps = [
"@zlib_archive//:zlib",
] + select({
- ":ios": [],
- ":windows": [],
+ "@//tensorflow:ios": [],
+ "@//tensorflow:windows": [],
"//conditions:default": [
"@boringssl//:ssl",
],
@@ -386,7 +386,7 @@ cc_binary(
"src/tool_xattr.h",
],
copts = select({
- ":windows": [
+ "@//tensorflow:windows": [
"/I%prefix%/curl/lib",
"/DHAVE_CONFIG_H",
"/DCURL_DISABLE_LIBCURL_OPTION",
@@ -657,23 +657,3 @@ genrule(
"EOF",
]),
)
-
-config_setting(
- name = "ios",
- values = {"crosstool_top": "//tools/osx/crosstool:crosstool"},
-)
-
-config_setting(
- name = "darwin",
- values = {"cpu": "darwin"},
-)
-
-config_setting(
- name = "windows",
- values = {"cpu": "x64_windows_msvc"},
-)
-
-config_setting(
- name = "android",
- values = {"crosstool_top": "//external:android/crosstool"},
-)
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
index 98deb1742e..078be83e0d 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
@@ -11,6 +11,13 @@ typedef struct Packet32q8i {
Packet32q8i(__m256i val) : val(val) {}
} Packet32q8i;
+typedef struct Packet16q16i {
+ __m256i val;
+ operator __m256i() const { return val; }
+ Packet16q16i();
+ Packet16q16i(__m256i val) : val(val) {}
+} Packet16q16i;
+
typedef struct Packet32q8u {
__m256i val;
operator __m256i() const { return val; }
@@ -32,6 +39,13 @@ typedef struct Packet16q8u {
Packet16q8u(__m128i val) : val(val) {}
} Packet16q8u;
+typedef struct Packet8q16i {
+ __m128i val;
+ operator __m128i() const { return val; }
+ Packet8q16i();
+ Packet8q16i(__m128i val) : val(val) {}
+} Packet8q16i;
+
typedef struct Packet8q32i {
__m256i val;
operator __m256i() const { return val; }
@@ -92,6 +106,28 @@ struct packet_traits<QUInt8> : default_packet_traits {
};
};
template <>
+struct packet_traits<QInt16> : default_packet_traits {
+ typedef Packet16q16i type;
+ typedef Packet8q16i half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 16,
+ };
+ enum {
+ HasAdd = 0,
+ HasSub = 0,
+ HasMul = 0,
+ HasNegate = 0,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 1,
+ HasMax = 1,
+ HasConj = 0,
+ HasSetLinear = 0
+ };
+};
+template <>
struct packet_traits<QInt32> : default_packet_traits {
typedef Packet8q32i type;
typedef Packet4q32i half;
@@ -122,6 +158,12 @@ struct unpacket_traits<Packet32q8i> {
enum { size = 32, alignment=Aligned32 };
};
template <>
+struct unpacket_traits<Packet16q16i> {
+ typedef QInt16 type;
+ typedef Packet8q16i half;
+ enum { size = 16, alignment=Aligned32 };
+};
+template <>
struct unpacket_traits<Packet32q8u> {
typedef QUInt8 type;
typedef Packet16q8u half;
@@ -146,6 +188,11 @@ EIGEN_STRONG_INLINE Packet32q8u ploadu<Packet32q8u>(const QUInt8* from) {
reinterpret_cast<const __m256i*>(from));
}
template <>
+EIGEN_STRONG_INLINE Packet16q16i ploadu<Packet16q16i>(const QInt16* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(
+ reinterpret_cast<const __m256i*>(from));
+}
+template <>
EIGEN_STRONG_INLINE Packet8q32i ploadu<Packet8q32i>(const QInt32* from) {
EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(
reinterpret_cast<const __m256i*>(from));
@@ -163,6 +210,11 @@ EIGEN_STRONG_INLINE Packet32q8u pload<Packet32q8u>(const QUInt8* from) {
reinterpret_cast<const __m256i*>(from));
}
template <>
+EIGEN_STRONG_INLINE Packet16q16i pload<Packet16q16i>(const QInt16* from) {
+ EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(
+ reinterpret_cast<const __m256i*>(from));
+}
+template <>
EIGEN_STRONG_INLINE Packet8q32i pload<Packet8q32i>(const QInt32* from) {
EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(
reinterpret_cast<const __m256i*>(from));
@@ -180,6 +232,11 @@ EIGEN_STRONG_INLINE void pstoreu<QUInt8>(QUInt8* to, const Packet32q8u& from) {
reinterpret_cast<__m256i*>(to), from.val);
}
template <>
+EIGEN_STRONG_INLINE void pstoreu<QInt16>(QInt16* to, const Packet16q16i& from) {
+ EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
+ reinterpret_cast<__m256i*>(to), from.val);
+}
+template <>
EIGEN_STRONG_INLINE void pstoreu<QInt32>(QInt32* to, const Packet8q32i& from) {
EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
reinterpret_cast<__m256i*>(to), from.val);
@@ -192,6 +249,11 @@ EIGEN_STRONG_INLINE void pstore<QInt32>(QInt32* to, const Packet8q32i& from) {
from.val);
}
template <>
+EIGEN_STRONG_INLINE void pstore<QInt16>(QInt16* to, const Packet16q16i& from) {
+ EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
+ from.val);
+}
+template <>
EIGEN_STRONG_INLINE void pstore<QUInt8>(QUInt8* to, const Packet32q8u& from) {
EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
from.val);
@@ -208,6 +270,10 @@ EIGEN_STRONG_INLINE QInt32 pfirst<Packet8q32i>(const Packet8q32i& a) {
return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
}
template <>
+EIGEN_STRONG_INLINE QInt16 pfirst<Packet16q16i>(const Packet16q16i& a) {
+ return _mm256_extract_epi16(a.val, 0);
+}
+template <>
EIGEN_STRONG_INLINE QUInt8 pfirst<Packet32q8u>(const Packet32q8u& a) {
return static_cast<uint8_t>(_mm256_extract_epi8(a.val, 0));
}
@@ -237,6 +303,10 @@ EIGEN_STRONG_INLINE Packet8q32i padd<Packet8q32i>(const Packet8q32i& a,
return _mm256_add_epi32(a.val, b.val);
}
template <>
+EIGEN_STRONG_INLINE Packet16q16i pset1<Packet16q16i>(const QInt16& from) {
+ return _mm256_set1_epi16(from.value);
+}
+template <>
EIGEN_STRONG_INLINE Packet8q32i psub<Packet8q32i>(const Packet8q32i& a,
const Packet8q32i& b) {
return _mm256_sub_epi32(a.val, b.val);
@@ -265,6 +335,17 @@ EIGEN_STRONG_INLINE Packet8q32i pmax<Packet8q32i>(const Packet8q32i& a,
}
template <>
+EIGEN_STRONG_INLINE Packet16q16i pmin<Packet16q16i>(const Packet16q16i& a,
+ const Packet16q16i& b) {
+ return _mm256_min_epi16(a.val, b.val);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16q16i pmax<Packet16q16i>(const Packet16q16i& a,
+ const Packet16q16i& b) {
+ return _mm256_max_epi16(a.val, b.val);
+}
+
+template <>
EIGEN_STRONG_INLINE Packet32q8u pmin<Packet32q8u>(const Packet32q8u& a,
const Packet32q8u& b) {
return _mm256_min_epu8(a.val, b.val);
@@ -305,6 +386,23 @@ EIGEN_STRONG_INLINE QInt32 predux_max<Packet8q32i>(const Packet8q32i& a) {
}
template <>
+EIGEN_STRONG_INLINE QInt16 predux_min<Packet16q16i>(const Packet16q16i& a) {
+ __m256i tmp = _mm256_min_epi16(a, _mm256_permute2f128_si256(a, a, 1));
+ tmp =
+ _mm256_min_epi16(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2)));
+ tmp = _mm256_min_epi16(tmp, _mm256_shuffle_epi32(tmp, 1));
+ return std::min(_mm256_extract_epi16(tmp, 0), _mm256_extract_epi16(tmp, 1));
+}
+template <>
+EIGEN_STRONG_INLINE QInt16 predux_max<Packet16q16i>(const Packet16q16i& a) {
+ __m256i tmp = _mm256_max_epi16(a, _mm256_permute2f128_si256(a, a, 1));
+ tmp =
+ _mm256_max_epi16(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2)));
+ tmp = _mm256_max_epi16(tmp, _mm256_shuffle_epi32(tmp, 1));
+ return std::max(_mm256_extract_epi16(tmp, 0), _mm256_extract_epi16(tmp, 1));
+}
+
+template <>
EIGEN_STRONG_INLINE QUInt8 predux_min<Packet32q8u>(const Packet32q8u& a) {
__m256i tmp = _mm256_min_epu8(a, _mm256_permute2f128_si256(a, a, 1));
tmp =
diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD
index ddaf29a086..7e95ebd355 100644
--- a/third_party/mkl/BUILD
+++ b/third_party/mkl/BUILD
@@ -8,12 +8,17 @@ config_setting(
visibility = ["//visibility:public"],
)
+load(
+ "//third_party/mkl:build_defs.bzl",
+ "if_mkl",
+)
+
cc_library(
name = "intel_binary_blob",
- srcs = [
- "libiomp5.so",
+ srcs = if_mkl([
"libmklml_intel.so",
- ],
+ "libiomp5.so",
+ ]),
includes = ["."],
visibility = ["//visibility:public"],
)
diff --git a/third_party/nccl.BUILD b/third_party/nccl.BUILD
index bb460a05e0..06b9b8ff68 100644
--- a/third_party/nccl.BUILD
+++ b/third_party/nccl.BUILD
@@ -43,6 +43,24 @@ cc_library(
"-Iexternal/nccl_archive/src",
"-O3",
] + cuda_default_copts(),
+ linkopts = select({
+ "@%ws%//tensorflow:android": [
+ "-pie",
+ ],
+ "@%ws%//tensorflow:darwin": [
+ "-Wl,-framework",
+ "-Wl,CoreFoundation",
+ "-Wl,-framework",
+ "-Wl,Security",
+ ],
+ "@%ws%//tensorflow:ios": [],
+ "@%ws%//tensorflow:windows": [
+ "ws2_32.lib",
+ ],
+ "//conditions:default": [
+ "-lrt",
+ ],
+ }),
visibility = ["//visibility:public"],
deps = ["@local_config_cuda//cuda:cuda_headers"],
)
diff --git a/third_party/sycl/crosstool/computecpp.tpl b/third_party/sycl/crosstool/computecpp.tpl
index a5e6b9fe93..66dd9aea7b 100755
--- a/third_party/sycl/crosstool/computecpp.tpl
+++ b/third_party/sycl/crosstool/computecpp.tpl
@@ -26,9 +26,7 @@ def main():
if(output_file_index == 1):
# we are linking
- return subprocess.call([CPU_CXX_COMPILER] + compiler_flags)
-
- compiler_flags = compiler_flags + ['-D_GLIBCXX_USE_CXX11_ABI=0', '-DEIGEN_USE_SYCL=1']
+ return subprocess.call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined'])
# find what we compile
compiling_cpp = 0
@@ -38,6 +36,28 @@ def main():
if(compited_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C', '.cxx'))):
compiling_cpp = 1;
+ compiler_flags = compiler_flags + ['-D_GLIBCXX_USE_CXX11_ABI=0', '-DEIGEN_USE_SYCL=1', '-DTENSORFLOW_USE_SYCL', '-DEIGEN_HAS_C99_MATH']
+
+ if(compiling_cpp == 1):
+ # create a blacklist of folders that will be skipped when compiling with ComputeCpp
+ _skip = ["external", "llvm", ".cu.cc"]
+ # if compiling external project skip computecpp
+ if any(_folder in _skip for _folder in output_file_name):
+ return subprocess.call([CPU_CXX_COMPILER] + compiler_flags)
+
+ if(compiling_cpp == 1):
+ # this is an optimisation that will check if compiled file has to be compiled with ComputeCpp
+
+ _tmp_flags = [flag for flag in compiler_flags if not flag.startswith(('-o', output_file_name))]
+ # create preprocessed of the file
+ _cmd = " ".join([CPU_CXX_COMPILER] + _tmp_flags + ["-E"])
+ # check if it has parallel_for< in it
+ _cmd += " | grep \".parallel_for\" > /dev/null"
+ ps = subprocess.call(_cmd, shell=True)
+ # if not call CXX compiler
+ if(ps != 0):
+ return subprocess.call([CPU_CXX_COMPILER] + compiler_flags)
+
if(compiling_cpp == 1):
filename, file_extension = os.path.splitext(output_file_name)
bc_out = filename + '.sycl'
@@ -52,9 +72,12 @@ def main():
# dont want that in case of compiling with computecpp first
host_compiler_flags = [flag for flag in compiler_flags
if not flag.startswith(('-MF', '-MD',))
- if not '.d' in flag]
+ if not '.d' in flag
+ ]
+
+ host_compiler_flags[host_compiler_flags.index('-c')] = "--include"
- host_compiler_flags = ['-D_GLIBCXX_USE_CXX11_ABI=0', '-DTENSORFLOW_USE_SYCL', '-Wno-unused-variable', '-I', COMPUTECPP_INCLUDE, '--include', bc_out] + host_compiler_flags
+ host_compiler_flags = ['-xc++', '-D_GLIBCXX_USE_CXX11_ABI=0', '-DTENSORFLOW_USE_SYCL', '-Wno-unused-variable', '-I', COMPUTECPP_INCLUDE, '-c', bc_out] + host_compiler_flags
x = subprocess.call([CPU_CXX_COMPILER] + host_compiler_flags)
return x
else: