5 files changed, 726 insertions, 684 deletions
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
new file mode 100644
index 0000000000..b754bbf009
--- /dev/null
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
@@ -0,0 +1,545 @@
+#ifndef THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_
+#define THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_
+
+#include "PacketMathAVX2.h"
+
+namespace Eigen {
+namespace internal {
+
+typedef struct Packet64q8i {
+  __m512i val;
+  operator __m512i() const { return val; }
+  Packet64q8i();
+  Packet64q8i(__m512i val) : val(val) {}
+} Packet64q8i;
+
+typedef struct Packet32q16i {
+  __m512i val;
+  operator __m512i() const { return val; }
+  Packet32q16i();
+  Packet32q16i(__m512i val) : val(val) {}
+} Packet32q16i;
+
+typedef struct Packet64q8u {
+  __m512i val;
+  operator __m512i() const { return val; }
+  Packet64q8u();
+  Packet64q8u(__m512i val) : val(val) {}
+} Packet64q8u;
+
+typedef struct Packet16q32i {
+  __m512i val;
+  operator __m512i() const { return val; }
+  Packet16q32i();
+  Packet16q32i(__m512i val) : val(val) {}
+} Packet16q32i;
+
+template <>
+struct packet_traits<QInt8> : default_packet_traits {
+  typedef Packet64q8i type;
+  typedef Packet32q8i half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 64,
+  };
+  enum {
+    HasAdd = 0,
+    HasSub = 0,
+    HasMul = 0,
+    HasNegate = 0,
+    HasAbs = 0,
+    HasAbs2 = 0,
+    HasMin = 1,
+    HasMax = 1,
+    HasConj = 0,
+    HasSetLinear = 0
+  };
+};
+template <>
+struct packet_traits<QUInt8> : default_packet_traits {
+  typedef Packet64q8u type;
+  typedef Packet32q8u half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 64,
+  };
+  enum {
+    HasAdd = 0,
+    HasSub = 0,
+    HasMul = 0,
+    HasNegate = 0,
+    HasAbs = 0,
+    HasAbs2 = 0,
+    HasMin = 1,
+    HasMax = 1,
+    HasConj = 0,
+    HasSetLinear = 0
+  };
+};
+template <>
+struct packet_traits<QInt16> : default_packet_traits {
+  typedef Packet32q16i type;
+  typedef Packet16q16i half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 32,
+  };
+  enum {
+    HasAdd = 0,
+    HasSub = 0,
+    HasMul = 0,
+    HasNegate = 0,
+    HasAbs = 0,
+    HasAbs2 = 0,
+    HasMin = 1,
+    HasMax = 1,
+    HasConj = 0,
+    HasSetLinear = 0
+  };
+};
+template <>
+struct packet_traits<QInt32> : default_packet_traits {
+  typedef Packet16q32i type;
+  typedef Packet8q32i half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 16,
+  };
+  enum {
+    HasAdd = 1,
+    HasSub = 1,
+    HasMul = 1,
+    HasNegate = 1,
+    HasAbs = 0,
+    HasAbs2 = 0,
+    HasMin = 1,
+    HasMax = 1,
+    HasConj = 0,
+    HasSetLinear = 0
+  };
+};
+
+template <>
+struct unpacket_traits<Packet64q8i> {
+  typedef QInt8 type;
+  typedef Packet32q8i half;
+  enum { size = 64 };
+};
+template <>
+struct unpacket_traits<Packet32q16i> {
+  typedef QInt16 type;
+  typedef Packet16q16i half;
+  enum { size = 32 };
+};
+template <>
+struct unpacket_traits<Packet64q8u> {
+  typedef QUInt8 type;
+  typedef Packet32q8u half;
+  enum { size = 64 };
+};
+template <>
+struct unpacket_traits<Packet16q32i> {
+  typedef QInt32 type;
+  typedef Packet8q32i half;
+  enum { size = 16 };
+};
+
+// Unaligned load
+template <>
+EIGEN_STRONG_INLINE Packet64q8i ploadu<Packet64q8i>(const QInt8* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(
+      reinterpret_cast<const __m512i*>(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet32q16i ploadu<Packet32q16i>(const QInt16* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(
+      reinterpret_cast<const __m512i*>(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet64q8u ploadu<Packet64q8u>(const QUInt8* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(
+      reinterpret_cast<const __m512i*>(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16q32i ploadu<Packet16q32i>(const QInt32* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(
+      reinterpret_cast<const __m512i*>(from));
+}
+
+// Aligned load
+template <>
+EIGEN_STRONG_INLINE Packet64q8i pload<Packet64q8i>(const QInt8* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512(
+      reinterpret_cast<const __m512i*>(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet32q16i pload<Packet32q16i>(const QInt16* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512(
+      reinterpret_cast<const __m512i*>(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet64q8u pload<Packet64q8u>(const QUInt8* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512(
+      reinterpret_cast<const __m512i*>(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16q32i pload<Packet16q32i>(const QInt32* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512(
+      reinterpret_cast<const __m512i*>(from));
+}
+
+// Unaligned store
+template <>
+EIGEN_STRONG_INLINE void pstoreu<QInt8>(QInt8* to, const Packet64q8i& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(
+      reinterpret_cast<__m512i*>(to), from.val);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<QInt16>(QInt16* to, const Packet32q16i& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(
+      reinterpret_cast<__m512i*>(to), from.val);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<QUInt8>(QUInt8* to, const Packet64q8u& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(
+      reinterpret_cast<__m512i*>(to), from.val);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<QInt32>(QInt32* to, const Packet16q32i& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(
+      reinterpret_cast<__m512i*>(to), from.val);
+}
+
+// Aligned store
+template <>
+EIGEN_STRONG_INLINE void pstore<QInt32>(QInt32* to, const Packet16q32i& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_si512(reinterpret_cast<__m512i*>(to),
+                                               from.val);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<QUInt8>(QUInt8* to, const Packet64q8u& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_si512(reinterpret_cast<__m512i*>(to),
+                                               from.val);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<QInt8>(QInt8* to, const Packet64q8i& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_si512(reinterpret_cast<__m512i*>(to),
+                                               from.val);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<QInt16>(QInt16* to, const Packet32q16i& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_si512(reinterpret_cast<__m512i*>(to),
+                                               from.val);
+}
+
+// Extract first element.
+template <>
+EIGEN_STRONG_INLINE QInt32 pfirst<Packet16q32i>(const Packet16q32i& a) {
+  return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a, 0));
+}
+template <>
+EIGEN_STRONG_INLINE QUInt8 pfirst<Packet64q8u>(const Packet64q8u& a) {
+  return static_cast<uint8_t>(
+           _mm_extract_epi8(_mm512_extracti32x4_epi32(a.val, 0), 0));
+}
+template <>
+EIGEN_STRONG_INLINE QInt8 pfirst<Packet64q8i>(const Packet64q8i& a) {
+  return _mm_extract_epi8(_mm512_extracti32x4_epi32(a.val, 0), 0);
+}
+template <>
+EIGEN_STRONG_INLINE QInt16 pfirst<Packet32q16i>(const Packet32q16i& a) {
+  return _mm_extract_epi16(_mm512_extracti32x4_epi32(a.val, 0), 0);
+}
+
+// Initialize to constant value.
+template <>
+EIGEN_STRONG_INLINE Packet64q8i pset1<Packet64q8i>(const QInt8& from) {
+  return _mm512_set1_epi8(from.value);
+}
+template <>
+EIGEN_STRONG_INLINE Packet32q16i pset1<Packet32q16i>(const QInt16& from) {
+  return _mm512_set1_epi16(from.value);
+}
+template <>
+EIGEN_STRONG_INLINE Packet64q8u pset1<Packet64q8u>(const QUInt8& from) {
+  return _mm512_set1_epi8(static_cast<uint8_t>(from.value));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16q32i pset1<Packet16q32i>(const QInt32& from) {
+  return _mm512_set1_epi32(from.value);
+}
+
+// Basic arithmetic packet ops for QInt32.
+template <>
+EIGEN_STRONG_INLINE Packet16q32i padd<Packet16q32i>(const Packet16q32i& a,
+                                                    const Packet16q32i& b) {
+  return _mm512_add_epi32(a.val, b.val);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16q32i psub<Packet16q32i>(const Packet16q32i& a,
+                                                    const Packet16q32i& b) {
+  return _mm512_sub_epi32(a.val, b.val);
+}
+// Note: mullo truncates the result to 32 bits.
+template <>
+EIGEN_STRONG_INLINE Packet16q32i pmul<Packet16q32i>(const Packet16q32i& a,
+                                                    const Packet16q32i& b) {
+  return _mm512_mullo_epi32(a.val, b.val);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16q32i pnegate<Packet16q32i>(const Packet16q32i& a) {
+  return _mm512_sub_epi32(_mm512_setzero_si512(), a.val);
+}
+
+// Min and max.
+template <>
+EIGEN_STRONG_INLINE Packet16q32i pmin<Packet16q32i>(const Packet16q32i& a,
+                                                    const Packet16q32i& b) {
+  return _mm512_min_epi32(a.val, b.val);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16q32i pmax<Packet16q32i>(const Packet16q32i& a,
+                                                    const Packet16q32i& b) {
+  return _mm512_max_epi32(a.val, b.val);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet64q8u pmin<Packet64q8u>(const Packet64q8u& a,
+                                                  const Packet64q8u& b) {
+#ifdef EIGEN_VECTORIZE_AVX512BW
+  return _mm512_min_epu8(a.val, b.val);
+#else
+  __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0);
+  __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1);
+  __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0);
+  __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1);
+  __m256i r0 = _mm256_min_epu8(ap0, bp0);
+  __m256i r1 = _mm256_min_epu8(ap1, bp1);
+  return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1);
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet64q8u pmax<Packet64q8u>(const Packet64q8u& a,
+                                                  const Packet64q8u& b) {
+#ifdef EIGEN_VECTORIZE_AVX512BW
+  return _mm512_max_epu8(a.val, b.val);
+#else
+  __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0);
+  __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1);
+  __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0);
+  __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1);
+  __m256i r0 = _mm256_max_epu8(ap0, bp0);
+  __m256i r1 = _mm256_max_epu8(ap1, bp1);
+  return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1);
+#endif
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet64q8i pmin<Packet64q8i>(const Packet64q8i& a,
+                                                  const Packet64q8i& b) {
+#ifdef EIGEN_VECTORIZE_AVX512BW
+  return _mm512_min_epi8(a.val, b.val);
+#else
+  __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0);
+  __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1);
+  __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0);
+  __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1);
+  __m256i r0 = _mm256_min_epi8(ap0, bp0);
+  __m256i r1 = _mm256_min_epi8(ap1, bp1);
+  return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1);
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet32q16i pmin<Packet32q16i>(const Packet32q16i& a,
+                                                    const Packet32q16i& b) {
+#ifdef EIGEN_VECTORIZE_AVX512BW
+  return _mm512_min_epi16(a.val, b.val);
+#else
+  __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0);
+  __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1);
+  __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0);
+  __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1);
+  __m256i r0 = _mm256_min_epi16(ap0, bp0);
+  __m256i r1 = _mm256_min_epi16(ap1, bp1);
+  return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1);
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet64q8i pmax<Packet64q8i>(const Packet64q8i& a,
+                                                  const Packet64q8i& b) {
+#ifdef EIGEN_VECTORIZE_AVX512BW
+  return _mm512_max_epi8(a.val, b.val);
+#else
+  __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0);
+  __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1);
+  __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0);
+  __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1);
+  __m256i r0 = _mm256_max_epi8(ap0, bp0);
+  __m256i r1 = _mm256_max_epi8(ap1, bp1);
+  return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1);
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet32q16i pmax<Packet32q16i>(const Packet32q16i& a,
+                                                    const Packet32q16i& b) {
+#ifdef EIGEN_VECTORIZE_AVX512BW
+  return _mm512_max_epi16(a.val, b.val);
+#else
+  __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0);
+  __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1);
+  __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0);
+  __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1);
+  __m256i r0 = _mm256_max_epi16(ap0, bp0);
+  __m256i r1 = _mm256_max_epi16(ap1, bp1);
+  return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1);
+#endif
+}
+
+// Reductions.
+template <>
+EIGEN_STRONG_INLINE QInt32 predux_min<Packet16q32i>(const Packet16q32i& a) {
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i res =
+      _mm_min_epi32(_mm_min_epi32(lane0, lane1), _mm_min_epi32(lane2, lane3));
+  res = _mm_min_epi32(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
+  return pfirst(
+           _mm_min_epi32(
+             res,
+             _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+}
+template <>
+EIGEN_STRONG_INLINE QInt32 predux_max<Packet16q32i>(const Packet16q32i& a) {
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i res =
+      _mm_max_epi32(_mm_max_epi32(lane0, lane1), _mm_max_epi32(lane2, lane3));
+  res = _mm_max_epi32(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
+  return pfirst(
+           _mm_max_epi32(
+             res,
+             _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+}
+template <>
+EIGEN_STRONG_INLINE QInt16 predux_min<Packet32q16i>(const Packet32q16i& a) {
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i res =
+      _mm_min_epi16(_mm_min_epi16(lane0, lane1), _mm_min_epi16(lane2, lane3));
+  res = _mm_min_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
+  std::uint32_t w =
+      pfirst(
+        _mm_min_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  return std::min({
+           static_cast<std::int16_t>(w >> 16),
+           static_cast<std::int16_t>(w)
+         });
+}
+template <>
+EIGEN_STRONG_INLINE QInt16 predux_max<Packet32q16i>(const Packet32q16i& a) {
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i res =
+      _mm_max_epi16(_mm_max_epi16(lane0, lane1), _mm_max_epi16(lane2, lane3));
+  res = _mm_max_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
+  std::uint32_t w =
+      pfirst(
+        _mm_max_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  return std::min({
+           static_cast<std::int16_t>(w >> 16),
+           static_cast<std::int16_t>(w)
+         });
+}
+template <>
+EIGEN_STRONG_INLINE QUInt8 predux_min<Packet64q8u>(const Packet64q8u& a) {
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i res =
+      _mm_min_epu8(_mm_min_epu8(lane0, lane1), _mm_min_epu8(lane2, lane3));
+  res = _mm_min_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
+  std::uint32_t w =
+      pfirst(
+        _mm_min_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  return std::min({
+           static_cast<std::uint8_t>(w >> 24),
+           static_cast<std::uint8_t>(w >> 16),
+           static_cast<std::uint8_t>(w >> 8),
+           static_cast<std::uint8_t>(w)
+         });
+}
+template <>
+EIGEN_STRONG_INLINE QUInt8 predux_max<Packet64q8u>(const Packet64q8u& a) {
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i res =
+      _mm_max_epu8(_mm_max_epu8(lane0, lane1), _mm_max_epu8(lane2, lane3));
+  res = _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
+  std::uint32_t w =
+      pfirst(
+        _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  return std::min({
+           static_cast<std::uint8_t>(w >> 24),
+           static_cast<std::uint8_t>(w >> 16),
+           static_cast<std::uint8_t>(w >> 8),
+           static_cast<std::uint8_t>(w)
+         });
+}
+template <>
+EIGEN_STRONG_INLINE QInt8 predux_min<Packet64q8i>(const Packet64q8i& a) {
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i res =
+      _mm_min_epi8(_mm_min_epi8(lane0, lane1), _mm_min_epi8(lane2, lane3));
+  res = _mm_min_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
+  std::uint32_t w =
+      pfirst(
+        _mm_min_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  return std::min({
+           static_cast<std::int8_t>(w >> 24),
+           static_cast<std::int8_t>(w >> 16),
+           static_cast<std::int8_t>(w >> 8),
+           static_cast<std::int8_t>(w)
+         });
+}
+template <>
+EIGEN_STRONG_INLINE QInt8 predux_max<Packet64q8i>(const Packet64q8i& a) {
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i res =
+      _mm_max_epi8(_mm_max_epi8(lane0, lane1), _mm_max_epi8(lane2, lane3));
+  res = _mm_max_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
+  std::uint32_t w =
+      pfirst(
+        _mm_max_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  return std::min({
+           static_cast<std::int8_t>(w >> 24),
+           static_cast<std::int8_t>(w >> 16),
+           static_cast<std::int8_t>(w >> 8),
+           static_cast<std::int8_t>(w)
+         });
+}
+
+}  // end namespace internal
+}  // end namespace Eigen
+
+#endif  // THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
new file mode 100644
index 0000000000..cd7120ec00
--- /dev/null
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
@@ -0,0 +1,180 @@
+#ifndef THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
+#define THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
+
+namespace Eigen {
+namespace internal {
+
+typedef __m512 Packet16f;
+typedef __m512i Packet16i;
+
+template <>
+struct type_casting_traits<QInt32, float> {
+  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pcast<Packet16q32i>(const Packet16q32i& a) {
+  return _mm512_cvtepi32_ps(a.val);
+}
+
+template <>
+struct type_casting_traits<float, QInt32> {
+  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+
+template <>
+EIGEN_STRONG_INLINE Packet16q32i pcast<Packet16f>(const Packet16f& a) {
+  return _mm512_cvtps_epi32(a);
+}
+
+template <>
+struct type_casting_traits<float, QInt16> {
+  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
+};
+
+template <>
+EIGEN_STRONG_INLINE Packet32q16i
+pcast<Packet16f>(const Packet16f& a, const Packet16f& b) {
+  Packet16i a_int = _mm512_cvtps_epi32(a);
+  Packet16i b_int = _mm512_cvtps_epi32(b);
+#ifdef EIGEN_VECTORIZE_AVX512BW
+  return _mm512_packs_epi32(a_int, b_int);
+#else
+  Packet8i ab_int16_low =
+      _mm256_permute4x64_epi64(
+        _mm256_packs_epi32(
+          _mm512_castsi512_si256(a_int),
+          _mm512_castsi512_si256(b_int)),
+        _MM_SHUFFLE(0, 2, 1, 3));
+  Packet8i ab_int16_high =
+      _mm256_permute4x64_epi64(
+        _mm256_packs_epi32(
+          _mm512_extracti32x8_epi32(a_int, 1),
+          _mm512_extracti32x8_epi32(b_int, 1)),
+        _MM_SHUFFLE(0, 2, 1, 3));
+  return _mm512_inserti32x8(
+           _mm512_castsi256_si512(ab_int16_low),
+           ab_int16_high, 1);
+#endif
+}
+
+template <>
+struct type_casting_traits<float, QInt8> {
+  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
+};
+
+template <>
+EIGEN_STRONG_INLINE Packet64q8i
+pcast<Packet16f>(const Packet16f& a,
+                 const Packet16f& b,
+                 const Packet16f& c,
+                 const Packet16f& d) {
+  Packet16i a_int = _mm512_cvtps_epi32(a);
+  Packet16i b_int = _mm512_cvtps_epi32(b);
+  Packet16i c_int = _mm512_cvtps_epi32(c);
+  Packet16i d_int = _mm512_cvtps_epi32(d);
+#ifdef EIGEN_VECTORIZE_AVX512BW
+  return _mm512_packs_epi16(
+           _mm512_packs_epi32(a_int, b_int),
+           _mm512_packs_epi32(c_int, d_int));
+#else
+  Packet8i ab_int16_low =
+      _mm256_permute4x64_epi64(
+        _mm256_packs_epi32(
+          _mm512_castsi512_si256(a_int),
+          _mm512_castsi512_si256(b_int)),
+        _MM_SHUFFLE(0, 2, 1, 3));
+  Packet8i cd_int16_low =
+      _mm256_permute4x64_epi64(
+        _mm256_packs_epi32(
+          _mm512_castsi512_si256(c_int),
+          _mm512_castsi512_si256(d_int)),
+        _MM_SHUFFLE(0, 2, 1, 3));
+  Packet8i ab_int16_high =
+      _mm256_permute4x64_epi64(
+        _mm256_packs_epi32(
+          _mm512_extracti32x8_epi32(a_int, 1),
+          _mm512_extracti32x8_epi32(b_int, 1)),
+        _MM_SHUFFLE(0, 2, 1, 3));
+  Packet8i cd_int16_high =
+      _mm256_permute4x64_epi64(
+        _mm256_packs_epi32(
+          _mm512_extracti32x8_epi32(c_int, 1),
+          _mm512_extracti32x8_epi32(d_int, 1)),
+        _MM_SHUFFLE(0, 2, 1, 3));
+  Packet8i abcd_int8_low =
+      _mm256_permute4x64_epi64(
+        _mm256_packs_epi16(ab_int16_low, cd_int16_low),
+        _MM_SHUFFLE(0, 2, 1, 3));
+  Packet8i abcd_int8_high =
+      _mm256_permute4x64_epi64(
+        _mm256_packs_epi16(ab_int16_high, cd_int16_high),
+        _MM_SHUFFLE(0, 2, 1, 3));
+  return _mm512_inserti32x8(
+           _mm512_castsi256_si512(abcd_int8_low),
+           abcd_int8_high, 1);
+#endif
+}
+
+template <>
+struct type_casting_traits<QInt32, QInt8> {
+  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
+};
+
+template <>
+struct type_casting_traits<QInt32, QInt16> {
+  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
+};
+
+template <>
+EIGEN_STRONG_INLINE Packet64q8i
+pcast<Packet16q32i, Packet64q8i>(const Packet16q32i& a,
+                                 const Packet16q32i& b,
+                                 const Packet16q32i& c,
+                                 const Packet16q32i& d) {
+  __m512i converted = _mm512_packs_epi16(_mm512_packs_epi32(a.val, b.val),
+                                         _mm512_packs_epi32(c.val, d.val));
+  return converted;
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet32q16i
+pcast<Packet16q32i, Packet32q16i>(const Packet16q32i& a,
+                                  const Packet16q32i& b) {
+  __m512i converted = _mm512_packs_epi32(a.val, b.val);
+  return converted;
+}
+
+template <>
+struct type_casting_traits<QInt32, QUInt8> {
+  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
+};
+
+template <>
+EIGEN_STRONG_INLINE Packet64q8u
+pcast<Packet16q32i, Packet64q8u>(const Packet16q32i& a, const Packet16q32i& b,
+                                 const Packet16q32i& c, const Packet16q32i& d) {
+  const __m512i converted = _mm512_packus_epi16(
+      _mm512_packus_epi32(a.val, b.val), _mm512_packus_epi32(c.val, d.val));
+  return converted;
+}
+
+template <>
+struct type_casting_traits<QInt32, QUInt16> {
+  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
+};
+
+#if 0
+template <>
+EIGEN_STRONG_INLINE Packet32q16u
+pcast<Packet16q32i, Packet32q16u>(const Packet16q32i& a,
+                                  const Packet16q32i& b) {
+  const __m512i converted = _mm512_packus_epi32(a.val, b.val);
+  return converted;
+}
+#endif
+
+}  // end namespace internal
+}  // end namespace Eigen
+
+#endif  // THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
diff --git a/third_party/jpeg.BUILD b/third_party/jpeg.BUILD
deleted file mode 100644
index cbc1e86e51..0000000000
--- a/third_party/jpeg.BUILD
+++ /dev/null
@@ -1,416 +0,0 @@
-# Description:
-#   libjpeg-turbo is a drop in replacement for jpeglib optimized with SIMD.
-
-licenses(["notice"])  # custom notice-style license, see LICENSE.md
-
-exports_files(["LICENSE.md"])
-
-libjpegturbo_nocopts = "-[W]error"
-
-libjpegturbo_copts = select({
-    ":android": [
-        "-O2",
-        "-fPIE",
-        "-w",
-    ],
-    ":windows": [
-        "/Ox",
-        "/w14711",  # function 'function' selected for inline expansion
-        "/w14710",  # 'function' : function not inlined
-    ],
-    "//conditions:default": [
-        "-O3",
-        "-w",
-    ],
-}) + select({
-    ":armeabi-v7a": [
-        "-D__ARM_NEON__",
-        "-march=armv7-a",
-        "-mfloat-abi=softfp",
-        "-fprefetch-loop-arrays",
-    ],
-    "//conditions:default": [],
-})
-
-cc_library(
-    name = "jpeg",
-    srcs = [
-        "jaricom.c",
-        "jcapimin.c",
-        "jcapistd.c",
-        "jcarith.c",
-        "jccoefct.c",
-        "jccolor.c",
-        "jcdctmgr.c",
-        "jchuff.c",
-        "jchuff.h",
-        "jcinit.c",
-        "jcmainct.c",
-        "jcmarker.c",
-        "jcmaster.c",
-        "jcomapi.c",
-        "jconfig.h",
-        "jconfigint.h",
-        "jcparam.c",
-        "jcphuff.c",
-        "jcprepct.c",
-        "jcsample.c",
-        "jctrans.c",
-        "jdapimin.c",
-        "jdapistd.c",
-        "jdarith.c",
-        "jdatadst.c",
-        "jdatasrc.c",
-        "jdcoefct.c",
-        "jdcoefct.h",
-        "jdcolor.c",
-        "jdct.h",
-        "jddctmgr.c",
-        "jdhuff.c",
-        "jdhuff.h",
-        "jdinput.c",
-        "jdmainct.c",
-        "jdmainct.h",
-        "jdmarker.c",
-        "jdmaster.c",
-        "jdmaster.h",
-        "jdmerge.c",
-        "jdphuff.c",
-        "jdpostct.c",
-        "jdsample.c",
-        "jdsample.h",
-        "jdtrans.c",
-        "jerror.c",
-        "jfdctflt.c",
-        "jfdctfst.c",
-        "jfdctint.c",
-        "jidctflt.c",
-        "jidctfst.c",
-        "jidctint.c",
-        "jidctred.c",
-        "jinclude.h",
-        "jmemmgr.c",
-        "jmemnobs.c",
-        "jmemsys.h",
-        "jpeg_nbits_table.h",
-        "jpegcomp.h",
-        "jquant1.c",
-        "jquant2.c",
-        "jutils.c",
-        "jversion.h",
-    ],
-    hdrs = [
-        "jccolext.c",  # should have been named .inc
-        "jdcol565.c",  # should have been named .inc
-        "jdcolext.c",  # should have been named .inc
-        "jdmrg565.c",  # should have been named .inc
-        "jdmrgext.c",  # should have been named .inc
-        "jerror.h",
-        "jmorecfg.h",
-        "jpegint.h",
-        "jpeglib.h",
-        "jstdhuff.c",  # should have been named .inc
-    ],
-    copts = libjpegturbo_copts,
-    nocopts = libjpegturbo_nocopts,
-    visibility = ["//visibility:public"],
-    deps = select({
-        ":k8": [":simd_x86_64"],
-        ":armeabi-v7a": [":simd_armv7a"],
-        ":arm64-v8a": [":simd_armv8a"],
-        "//conditions:default": [":simd_none"],
-    }),
-)
-
-cc_library(
-    name = "simd_x86_64",
-    srcs = [
-        "jchuff.h",
-        "jconfig.h",
-        "jdct.h",
-        "jerror.h",
-        "jinclude.h",
-        "jmorecfg.h",
-        "jpegint.h",
-        "jpeglib.h",
-        "jsimd.h",
-        "jsimddct.h",
-        "simd/jccolor-sse2-64.o",
-        "simd/jcgray-sse2-64.o",
-        "simd/jchuff-sse2-64.o",
-        "simd/jcsample-sse2-64.o",
-        "simd/jdcolor-sse2-64.o",
-        "simd/jdmerge-sse2-64.o",
-        "simd/jdsample-sse2-64.o",
-        "simd/jfdctflt-sse-64.o",
-        "simd/jfdctfst-sse2-64.o",
-        "simd/jfdctint-sse2-64.o",
-        "simd/jidctflt-sse2-64.o",
-        "simd/jidctfst-sse2-64.o",
-        "simd/jidctint-sse2-64.o",
-        "simd/jidctred-sse2-64.o",
-        "simd/jquantf-sse2-64.o",
-        "simd/jquanti-sse2-64.o",
-        "simd/jsimd.h",
-        "simd/jsimd_x86_64.c",
-    ],
-    copts = libjpegturbo_copts,
-    linkstatic = 1,
-    nocopts = libjpegturbo_nocopts,
-)
-
-genrule(
-    name = "simd_x86_64_assemblage23",
-    srcs = [
-        "simd/jccolext-sse2-64.asm",
-        "simd/jccolor-sse2-64.asm",
-        "simd/jcgray-sse2-64.asm",
-        "simd/jcgryext-sse2-64.asm",
-        "simd/jchuff-sse2-64.asm",
-        "simd/jcolsamp.inc",
-        "simd/jcsample-sse2-64.asm",
-        "simd/jdcolext-sse2-64.asm",
-        "simd/jdcolor-sse2-64.asm",
-        "simd/jdct.inc",
-        "simd/jdmerge-sse2-64.asm",
-        "simd/jdmrgext-sse2-64.asm",
-        "simd/jdsample-sse2-64.asm",
-        "simd/jfdctflt-sse-64.asm",
-        "simd/jfdctfst-sse2-64.asm",
-        "simd/jfdctint-sse2-64.asm",
-        "simd/jidctflt-sse2-64.asm",
-        "simd/jidctfst-sse2-64.asm",
-        "simd/jidctint-sse2-64.asm",
-        "simd/jidctred-sse2-64.asm",
-        "simd/jpeg_nbits_table.inc",
-        "simd/jquantf-sse2-64.asm",
-        "simd/jquanti-sse2-64.asm",
-        "simd/jsimdcfg.inc",
-        "simd/jsimdext.inc",
-    ],
-    outs = [
-        "simd/jccolor-sse2-64.o",
-        "simd/jcgray-sse2-64.o",
-        "simd/jchuff-sse2-64.o",
-        "simd/jcsample-sse2-64.o",
-        "simd/jdcolor-sse2-64.o",
-        "simd/jdmerge-sse2-64.o",
-        "simd/jdsample-sse2-64.o",
-        "simd/jfdctflt-sse-64.o",
-        "simd/jfdctfst-sse2-64.o",
-        "simd/jfdctint-sse2-64.o",
-        "simd/jidctflt-sse2-64.o",
-        "simd/jidctfst-sse2-64.o",
-        "simd/jidctint-sse2-64.o",
-        "simd/jidctred-sse2-64.o",
-        "simd/jquantf-sse2-64.o",
-        "simd/jquanti-sse2-64.o",
-    ],
-    cmd = "for out in $(OUTS); do\n" +
-          "  $(location @nasm//:nasm) -f elf64" +
-          "    -DELF -DPIC -DRGBX_FILLER_0XFF -D__x86_64__ -DARCH_X86_64" +
-          "    -I $$(dirname $(location simd/jdct.inc))/" +
-          "    -I $$(dirname $(location simd/jsimdcfg.inc))/" +
-          "    -o $$out" +
-          "    $$(dirname $(location simd/jdct.inc))/$$(basename $${out%.o}.asm)\n" +
-          "done",
-    tools = ["@nasm//:nasm"],
-)
-
-cc_library(
-    name = "simd_armv7a",
-    srcs = [
-        "jchuff.h",
-        "jconfig.h",
-        "jdct.h",
-        "jinclude.h",
-        "jmorecfg.h",
-        "jpeglib.h",
-        "jsimd.h",
-        "jsimddct.h",
-        "simd/jsimd.h",
-        "simd/jsimd_arm.c",
-        "simd/jsimd_arm_neon.S",
-    ],
-    copts = libjpegturbo_copts,
-    nocopts = libjpegturbo_nocopts,
-)
-
-cc_library(
-    name = "simd_armv8a",
-    srcs = [
-        "jchuff.h",
-        "jconfig.h",
-        "jdct.h",
-        "jinclude.h",
-        "jmorecfg.h",
-        "jpeglib.h",
-        "jsimd.h",
-        "jsimddct.h",
-        "simd/jsimd.h",
-        "simd/jsimd_arm64.c",
-        "simd/jsimd_arm64_neon.S",
-    ],
-    copts = libjpegturbo_copts,
-    nocopts = libjpegturbo_nocopts,
-)
-
-cc_library(
-    name = "simd_none",
-    srcs = [
-        "jchuff.h",
-        "jconfig.h",
-        "jdct.h",
-        "jerror.h",
-        "jinclude.h",
-        "jmorecfg.h",
-        "jpegint.h",
-        "jpeglib.h",
-        "jsimd.h",
-        "jsimd_none.c",
-        "jsimddct.h",
-    ],
-    copts = libjpegturbo_copts,
-    nocopts = libjpegturbo_nocopts,
-)
-
-genrule(
-    name = "configure",
-    outs = ["jconfig.h"],
-    cmd = "cat <<'EOF' >$@\n" +
-          "#define JPEG_LIB_VERSION 62\n" +
-          "#define LIBJPEG_TURBO_VERSION 1.5.1\n" +
-          "#define LIBJPEG_TURBO_VERSION_NUMBER 1005001\n" +
-          "#define C_ARITH_CODING_SUPPORTED 1\n" +
-          "#define D_ARITH_CODING_SUPPORTED 1\n" +
-          "#define BITS_IN_JSAMPLE 8\n" +
-          "#define HAVE_LOCALE_H 1\n" +
-          "#define HAVE_STDDEF_H 1\n" +
-          "#define HAVE_STDLIB_H 1\n" +
-          "#define HAVE_UNSIGNED_CHAR 1\n" +
-          "#define HAVE_UNSIGNED_SHORT 1\n" +
-          "#define MEM_SRCDST_SUPPORTED 1\n" +
-          "#define NEED_SYS_TYPES_H 1\n" +
-          select({
-              ":k8": "#define WITH_SIMD 1\n",
-              ":armeabi-v7a": "#define WITH_SIMD 1\n",
-              ":arm64-v8a": "#define WITH_SIMD 1\n",
-              "//conditions:default": "",
-          }) +
-          "EOF",
-)
-
-genrule(
-    name = "configure_internal",
-    outs = ["jconfigint.h"],
-    cmd = "cat <<'EOF' >$@\n" +
-          "#define BUILD \"20161115\"\n" +
-          "#ifdef _MSC_VER  /* Windows */\n" +
-          "#define INLINE __inline\n" +
-          "#else\n" +
-          "#define INLINE inline __attribute__((always_inline))\n" +
-          "#endif\n" +
-          "#define PACKAGE_NAME \"libjpeg-turbo\"\n" +
-          "#define VERSION \"1.5.1\"\n" +
-          "#if (__WORDSIZE==64 && !defined(__native_client__)) || defined(_WIN64)\n" +
-          "#define SIZEOF_SIZE_T 8\n" +
-          "#else\n" +
-          "#define SIZEOF_SIZE_T 4\n" +
-          "#endif\n" +
-          "EOF",
-)
-
-# jiminy cricket the way this file is generated is completely outrageous
-genrule(
-    name = "configure_simd",
-    outs = ["simd/jsimdcfg.inc"],
-    cmd = "cat <<'EOF' >$@\n" +
-          "%define DCTSIZE 8\n" +
-          "%define DCTSIZE2 64\n" +
-          "%define RGB_RED 0\n" +
-          "%define RGB_GREEN 1\n" +
-          "%define RGB_BLUE 2\n" +
-          "%define RGB_PIXELSIZE 3\n" +
-          "%define EXT_RGB_RED 0\n" +
-          "%define EXT_RGB_GREEN 1\n" +
-          "%define EXT_RGB_BLUE 2\n" +
-          "%define EXT_RGB_PIXELSIZE 3\n" +
-          "%define EXT_RGBX_RED 0\n" +
-          "%define EXT_RGBX_GREEN 1\n" +
-          "%define EXT_RGBX_BLUE 2\n" +
-          "%define EXT_RGBX_PIXELSIZE 4\n" +
-          "%define EXT_BGR_RED 2\n" +
-          "%define EXT_BGR_GREEN 1\n" +
-          "%define EXT_BGR_BLUE 0\n" +
-          "%define EXT_BGR_PIXELSIZE 3\n" +
-          "%define EXT_BGRX_RED 2\n" +
-          "%define EXT_BGRX_GREEN 1\n" +
-          "%define EXT_BGRX_BLUE 0\n" +
-          "%define EXT_BGRX_PIXELSIZE 4\n" +
-          "%define EXT_XBGR_RED 3\n" +
-          "%define EXT_XBGR_GREEN 2\n" +
-          "%define EXT_XBGR_BLUE 1\n" +
-          "%define EXT_XBGR_PIXELSIZE 4\n" +
-          "%define EXT_XRGB_RED 1\n" +
-          "%define EXT_XRGB_GREEN 2\n" +
-          "%define EXT_XRGB_BLUE 3\n" +
-          "%define EXT_XRGB_PIXELSIZE 4\n" +
-          "%define RGBX_FILLER_0XFF 1\n" +
-          "%define JSAMPLE byte ; unsigned char\n" +
-          "%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE)\n" +
-          "%define CENTERJSAMPLE 128\n" +
-          "%define JCOEF word ; short\n" +
-          "%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF)\n" +
-          "%define JDIMENSION dword ; unsigned int\n" +
-          "%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION)\n" +
-          "%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h)\n" +
-          "%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h)\n" +
-          "%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h)\n" +
-          "%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h)\n" +
-          "%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW)\n" +
-          "%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY)\n" +
-          "%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE)\n" +
-          "%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR)\n" +
-          "%define DCTELEM word ; short\n" +
-          "%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM)\n" +
-          "%define float FP32 ; float\n" +
-          "%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(float)\n" +
-          "%define ISLOW_MULT_TYPE word ; must be short\n" +
-          "%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE)\n" +
-          "%define IFAST_MULT_TYPE word ; must be short\n" +
-          "%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE)\n" +
-          "%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors\n" +
-          "%define FLOAT_MULT_TYPE FP32 ; must be float\n" +
-          "%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE)\n" +
-          "%define JSIMD_NONE 0x00\n" +
-          "%define JSIMD_MMX 0x01\n" +
-          "%define JSIMD_3DNOW 0x02\n" +
-          "%define JSIMD_SSE 0x04\n" +
-          "%define JSIMD_SSE2 0x08\n" +
-          "EOF",
-)
-
-config_setting(
-    name = "k8",
-    values = {"cpu": "k8"},
-)
-
-config_setting(
-    name = "android",
-    values = {"crosstool_top": "//external:android/crosstool"},
-)
-
-config_setting(
-    name = "armeabi-v7a",
-    values = {"android_cpu": "armeabi-v7a"},
-)
-
-config_setting(
-    name = "arm64-v8a",
-    values = {"android_cpu": "arm64-v8a"},
-)
-
-config_setting(
-    name = "windows",
-    values = {"cpu": "x64_windows_msvc"},
-)
diff --git a/third_party/sycl/crosstool/BUILD b/third_party/sycl/crosstool/BUILD
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/third_party/sycl/crosstool/BUILD
@@ -0,0 +1 @@
+
diff --git a/third_party/sycl/sycl/LICENSE.text.tpl b/third_party/sycl/sycl/LICENSE.text.tpl
deleted file mode 100644
index 0c2955c4d7..0000000000
--- a/third_party/sycl/sycl/LICENSE.text.tpl
+++ /dev/null
@@ -1,268 +0,0 @@
-
----------------------------------------------------------------------
-
-SOFTWARE LICENSE AGREEMENT
-
----------------------------------------------------------------------
----------------------------------------------------------------------
-
-By downloading, installing, copying, or otherwise using the
-ComputeCpp Community Edition software, including any associated
-components, media, printed materials, and electronic documentation
-("Software"), the user agrees to the following terms and conditions
-of this Software License Agreement ("Agreement"). Please read the
-terms of this Agreement carefully before beginning your download, as
-pressing the "I AGREE" button at the end of this Agreement will
-confirm your assent. If you do not agree to these terms, then
-Codeplay Software Limited is unwilling to license the Software to
-you; so please press the "CANCEL" button to cancel your download.
-
- 1. License. Codeplay Software Ltd., a company incorporated in
-    England and Wales with registered number 04567874 and having its
-    registered office at Regent House, 316 Beulah Hill, London,
-    United Kingdom, SE19 3HF ("Codeplay") hereby grants the user,
-    free of charge, a non-exclusive worldwide license to use and
-    replicate (but not modify) the Software for any use, whether
-    commercial or non-commercial, in accordance with this Agreement.
-    Codeplay reserves all rights to the Software that are not
-    expressly granted by this Agreement.
- 2. Redistribution. The user may copy and redistribute unmodified
-    copies of only those components of the Software which are
-    specified below ("Redistributable Components"), in object code
-    form, as part of the user’s software applications or libraries
-    ("Applications"). The user acknowledges and agrees that it has no
-    right to modify the Redistributable Components in any way. Any
-    use of the Redistributable Components within the user’s
-    Applications will continue to be subject to the terms and
-    conditions of this Agreement, and the user must also distribute a
-    copy of this Agreement and reproduce and include all notices of
-    copyrights or other proprietary rights in the Software. The
-    user’s redistribution of the Redistributable Components will not
-    entitle it to any payment from Codeplay. The user may not
-    transfer any of its rights or obligations under this Agreement.
-
-+-------------------------------------------+
-|Redistributable Component|File Name        |
-|-------------------------+-----------------|
-|Runtime (for Linux)      |libComputeCpp.so |
-|-------------------------+-----------------|
-|Runtime (for Windows)    |libComputeCpp.dll|
-+-------------------------------------------+
-
- 3. Restrictions. The user shall not:
-
-     a. circumvent or bypass any technological protection measures in
-        or relating to the Software;
-     b. use the Software to perform any unauthorized transfer of
-        information or for any illegal purpose;
-     c. de-compile, decrypt, disassemble, hack, emulate, exploit or
-        reverse-engineer the Software (other than to the limited
-        extent permitted by law);
-     d. copy or redistribute any components of the Software that are
-        not listed in the table of Redistributable Components;
-     e. publish, rent, lease, sell, export, import, or lend the
-        Software;
-     f. represent in any way that it is selling the Software itself
-        or any license to use the Software, nor refer to Codeplay or
-        ComputeCpp within its marketing materials, without the
-        express prior written permission of Codeplay.
- 4. Support. Codeplay does not provide any guarantees of support for
-    the Software to the user. Codeplay will use reasonable endeavours
-    to respond to users' support requests, for the most recent
-    release only, via the community support website at https://
-    computecpp.codeplay.com.
- 5. Intellectual Property. The Software is owned by Codeplay or its
-    licensors, and is protected by the copyright laws of the United
-    Kingdom and other countries and international treaty provisions.
-    Codeplay (and/or its licensors, as the case may be) retains all
-    copyrights, trade secrets and other proprietary rights in the
-    Software, including the rights to make and license the use of all
-    copies. To the extent that any patents owned by Codeplay or its
-    licensors relate to any component of the Software, the licence
-    granted to the user in accordance with this Agreement allows for
-    the lawful use of such patents but only for the purposes of this
-    Agreement and not further or otherwise. Therefore, the user may
-    make no copies of the Software, or the written materials that
-    accompany the Software, or reproduce it in any way, except as set
-    forth above.
- 6. Terms. This Agreement is effective until terminated. Codeplay or
-    the user may terminate it immediately at any time. Any violation
-    of the terms of this Agreement by the user will result in
-    immediate termination by Codeplay. Upon termination, the user
-    must return or destroy the Software and accompanying materials
-    and notify Codeplay of its actions by email to info@codeplay.com.
- 7. NO WARRANTIES. Codeplay expressly disclaims any warranty for the
-    Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
-    ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
-    WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
-    AND NON-INFRINGEMENT. IN NO EVENT SHALL CODEPLAY BE LIABLE FOR
-    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-    CONTRACT, DELICT OR TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-    SOFTWARE. In particular, Codeplay provides no guarantees of
-    application performance on the target hardware.
- 8. General. The invalidity of any portion or provision of this
-    Agreement shall not affect any other portions or provisions. This
-    Agreement shall be governed by the laws of Scotland. This
-    Agreement is the complete and exclusive agreement between the
-    user and Codeplay regarding the Software, and it supersedes any
-    prior agreement, oral or written, and any other communication
-    between the user and Codeplay relating to the subject matter of
-    the Agreement. Any amendment or modification of this Agreement
-    must be in writing and signed by both parties. If the user does
-    not agree to the terms of this Agreement, the user must not
-    install or use the Software.
- 9. Third Party Licenses. The following licenses are for third-party
-    components included in the software.
-
-     a. License for Clang/LLVM compiler technology components:
-
-==============================================================================
-
-LLVM Release License
-
-==============================================================================
-
-University of Illinois/NCSA
-
-Open Source License
-
-Copyright (c) 2007-2014 University of Illinois at Urbana-Champaign.
-
-All rights reserved.
-
-Developed by:
-
- LLVM Team
-
- University of Illinois at Urbana-Champaign
-
- http://llvm.org
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal with
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimers.
-
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimers in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the names of the LLVM Team, University of Illinois at
- Urbana-Champaign, nor the names of its contributors may be used to
- endorse or promote products derived from this Software without specific
- prior written permission.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-SOFTWARE.
-
-==============================================================================
-
- b. License for OpenBSD regex components:
-
-$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $
-Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved.
-This software is not subject to any license of the American Telephone
-and Telegraph Company or of the Regents of the University of California.
-Permission is granted to anyone to use this software for any purpose on
-any computer system, and to alter it and redistribute it, subject
-to the following restrictions:
-
-1. The author is not responsible for the consequences of use of this
- software, no matter how awful, even if they arise from flaws in it.
-
-2. The origin of this software must not be misrepresented, either by
- explicit claim or by omission. Since few users ever read sources,
- credits must appear in the documentation.
-
-3. Altered versions must be plainly marked as such, and must not be
- misrepresented as being the original software. Since few users
- ever read sources, credits must appear in the documentation.
-
-4. This notice may not be removed or altered.
-
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-
-/*-
- * Copyright (c) 1994
- *      The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *                  @(#)COPYRIGHT8.1 (Berkeley) 3/16/94
- */
-
- c. License for MD5 components:
-
-/*
- * This code is derived from (original license follows):
- *
- * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
- * MD5 Message-Digest Algorithm (RFC 1321).
- *
- * Homepage:
- *  http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
- *
- * Author:
- * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
- *
- * This software was written by Alexander Peslyak in 2001. No copyright is
- * claimed, and the software is hereby placed in the public domain.
- * In case this attempt to disclaim copyright and place the software in the
- * public domain is deemed null and void, then the software is
- * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
- * general public under the following terms:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted.
- *
- * There's ABSOLUTELY NO WARRANTY, express or implied.
- *
- * (This is a heavily cut-down "BSD license".)
- *
- * This differs from Colin Plumb's older public domain implementation in that
- * no exactly 32-bit integer data type is required (any 32-bit or wider
- * unsigned integer data type will do), there's no compile-time endianness
- * configuration, and the function prototypes match OpenSSL's. No code from
- * Colin Plumb's implementation has been reused; this comment merely compares
- * the properties of the two independent implementations.
- *
- * The primary goals of this implementation are portability and ease of use.
- * It is meant to be fast, but not as fast as possible. Some known
- * optimizations are not included to reduce source code size and avoid
- * compile-time configuration.
- */
-
-