diff options
Diffstat (limited to 'third_party')
-rw-r--r-- | third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h | 545 | ||||
-rw-r--r-- | third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h | 180 | ||||
-rw-r--r-- | third_party/jpeg.BUILD | 416 | ||||
-rw-r--r-- | third_party/sycl/crosstool/BUILD | 1 | ||||
-rw-r--r-- | third_party/sycl/sycl/LICENSE.text.tpl | 268 |
5 files changed, 726 insertions, 684 deletions
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h new file mode 100644 index 0000000000..b754bbf009 --- /dev/null +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h @@ -0,0 +1,545 @@ +#ifndef THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ +#define THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ + +#include "PacketMathAVX2.h" + +namespace Eigen { +namespace internal { + +typedef struct Packet64q8i { + __m512i val; + operator __m512i() const { return val; } + Packet64q8i(); + Packet64q8i(__m512i val) : val(val) {} +} Packet64q8i; + +typedef struct Packet32q16i { + __m512i val; + operator __m512i() const { return val; } + Packet32q16i(); + Packet32q16i(__m512i val) : val(val) {} +} Packet32q16i; + +typedef struct Packet64q8u { + __m512i val; + operator __m512i() const { return val; } + Packet64q8u(); + Packet64q8u(__m512i val) : val(val) {} +} Packet64q8u; + +typedef struct Packet16q32i { + __m512i val; + operator __m512i() const { return val; } + Packet16q32i(); + Packet16q32i(__m512i val) : val(val) {} +} Packet16q32i; + +template <> +struct packet_traits<QInt8> : default_packet_traits { + typedef Packet64q8i type; + typedef Packet32q8i half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 64, + }; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0 + }; +}; +template <> +struct packet_traits<QUInt8> : default_packet_traits { + typedef Packet64q8u type; + typedef Packet32q8u half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 64, + }; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0 + }; +}; +template <> +struct packet_traits<QInt16> : default_packet_traits { + typedef Packet32q16i type; + typedef Packet16q16i half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 32, + }; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0 + }; +}; +template <> +struct packet_traits<QInt32> : default_packet_traits { + typedef Packet16q32i type; + typedef Packet8q32i half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 16, + }; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0 + }; +}; + +template <> +struct unpacket_traits<Packet64q8i> { + typedef QInt8 type; + typedef Packet32q8i half; + enum { size = 64 }; +}; +template <> +struct unpacket_traits<Packet32q16i> { + typedef QInt16 type; + typedef Packet16q16i half; + enum { size = 32 }; +}; +template <> +struct unpacket_traits<Packet64q8u> { + typedef QUInt8 type; + typedef Packet32q8u half; + enum { size = 64 }; +}; +template <> +struct unpacket_traits<Packet16q32i> { + typedef QInt32 type; + typedef Packet8q32i half; + enum { size = 16 }; +}; + +// Unaligned load +template <> +EIGEN_STRONG_INLINE Packet64q8i ploadu<Packet64q8i>(const QInt8* from) { + EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512( + reinterpret_cast<const __m512i*>(from)); +} +template <> +EIGEN_STRONG_INLINE Packet32q16i ploadu<Packet32q16i>(const QInt16* from) { + EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512( + reinterpret_cast<const __m512i*>(from)); +} +template <> +EIGEN_STRONG_INLINE Packet64q8u ploadu<Packet64q8u>(const QUInt8* from) { + EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512( + reinterpret_cast<const __m512i*>(from)); +} +template <> +EIGEN_STRONG_INLINE Packet16q32i ploadu<Packet16q32i>(const QInt32* from) { + EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512( + reinterpret_cast<const __m512i*>(from)); +} + +// Aligned load +template <> +EIGEN_STRONG_INLINE Packet64q8i pload<Packet64q8i>(const QInt8* from) { + EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512( + reinterpret_cast<const __m512i*>(from)); +} +template <> +EIGEN_STRONG_INLINE Packet32q16i pload<Packet32q16i>(const QInt16* from) { + EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512( + reinterpret_cast<const __m512i*>(from)); +} +template <> +EIGEN_STRONG_INLINE Packet64q8u pload<Packet64q8u>(const QUInt8* from) { + EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512( + reinterpret_cast<const __m512i*>(from)); +} +template <> +EIGEN_STRONG_INLINE Packet16q32i pload<Packet16q32i>(const QInt32* from) { + EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512( + reinterpret_cast<const __m512i*>(from)); +} + +// Unaligned store +template <> +EIGEN_STRONG_INLINE void pstoreu<QInt8>(QInt8* to, const Packet64q8i& from) { + EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512( + reinterpret_cast<__m512i*>(to), from.val); +} +template <> +EIGEN_STRONG_INLINE void pstoreu<QInt16>(QInt16* to, const Packet32q16i& from) { + EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512( + reinterpret_cast<__m512i*>(to), from.val); +} +template <> +EIGEN_STRONG_INLINE void pstoreu<QUInt8>(QUInt8* to, const Packet64q8u& from) { + EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512( + reinterpret_cast<__m512i*>(to), from.val); +} +template <> +EIGEN_STRONG_INLINE void pstoreu<QInt32>(QInt32* to, const Packet16q32i& from) { + EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512( + reinterpret_cast<__m512i*>(to), from.val); +} + +// Aligned store +template <> +EIGEN_STRONG_INLINE void pstore<QInt32>(QInt32* to, const Packet16q32i& from) { + EIGEN_DEBUG_ALIGNED_STORE _mm512_store_si512(reinterpret_cast<__m512i*>(to), + from.val); +} +template <> +EIGEN_STRONG_INLINE void pstore<QUInt8>(QUInt8* to, const Packet64q8u& from) { + EIGEN_DEBUG_ALIGNED_STORE _mm512_store_si512(reinterpret_cast<__m512i*>(to), + from.val); +} +template <> +EIGEN_STRONG_INLINE void pstore<QInt8>(QInt8* to, const Packet64q8i& from) { + EIGEN_DEBUG_ALIGNED_STORE _mm512_store_si512(reinterpret_cast<__m512i*>(to), + from.val); +} +template <> +EIGEN_STRONG_INLINE void pstore<QInt16>(QInt16* to, const Packet32q16i& from) { + EIGEN_DEBUG_ALIGNED_STORE _mm512_store_si512(reinterpret_cast<__m512i*>(to), + from.val); +} + +// Extract first element. +template <> +EIGEN_STRONG_INLINE QInt32 pfirst<Packet16q32i>(const Packet16q32i& a) { + return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a, 0)); +} +template <> +EIGEN_STRONG_INLINE QUInt8 pfirst<Packet64q8u>(const Packet64q8u& a) { + return static_cast<uint8_t>( + _mm_extract_epi8(_mm512_extracti32x4_epi32(a.val, 0), 0)); +} +template <> +EIGEN_STRONG_INLINE QInt8 pfirst<Packet64q8i>(const Packet64q8i& a) { + return _mm_extract_epi8(_mm512_extracti32x4_epi32(a.val, 0), 0); +} +template <> +EIGEN_STRONG_INLINE QInt16 pfirst<Packet32q16i>(const Packet32q16i& a) { + return _mm_extract_epi16(_mm512_extracti32x4_epi32(a.val, 0), 0); +} + +// Initialize to constant value. +template <> +EIGEN_STRONG_INLINE Packet64q8i pset1<Packet64q8i>(const QInt8& from) { + return _mm512_set1_epi8(from.value); +} +template <> +EIGEN_STRONG_INLINE Packet32q16i pset1<Packet32q16i>(const QInt16& from) { + return _mm512_set1_epi16(from.value); +} +template <> +EIGEN_STRONG_INLINE Packet64q8u pset1<Packet64q8u>(const QUInt8& from) { + return _mm512_set1_epi8(static_cast<uint8_t>(from.value)); +} +template <> +EIGEN_STRONG_INLINE Packet16q32i pset1<Packet16q32i>(const QInt32& from) { + return _mm512_set1_epi32(from.value); +} + +// Basic arithmetic packet ops for QInt32. +template <> +EIGEN_STRONG_INLINE Packet16q32i padd<Packet16q32i>(const Packet16q32i& a, + const Packet16q32i& b) { + return _mm512_add_epi32(a.val, b.val); +} +template <> +EIGEN_STRONG_INLINE Packet16q32i psub<Packet16q32i>(const Packet16q32i& a, + const Packet16q32i& b) { + return _mm512_sub_epi32(a.val, b.val); +} +// Note: mullo truncates the result to 32 bits. +template <> +EIGEN_STRONG_INLINE Packet16q32i pmul<Packet16q32i>(const Packet16q32i& a, + const Packet16q32i& b) { + return _mm512_mullo_epi32(a.val, b.val); +} +template <> +EIGEN_STRONG_INLINE Packet16q32i pnegate<Packet16q32i>(const Packet16q32i& a) { + return _mm512_sub_epi32(_mm512_setzero_si512(), a.val); +} + +// Min and max. +template <> +EIGEN_STRONG_INLINE Packet16q32i pmin<Packet16q32i>(const Packet16q32i& a, + const Packet16q32i& b) { + return _mm512_min_epi32(a.val, b.val); +} +template <> +EIGEN_STRONG_INLINE Packet16q32i pmax<Packet16q32i>(const Packet16q32i& a, + const Packet16q32i& b) { + return _mm512_max_epi32(a.val, b.val); +} + +template <> +EIGEN_STRONG_INLINE Packet64q8u pmin<Packet64q8u>(const Packet64q8u& a, + const Packet64q8u& b) { +#ifdef EIGEN_VECTORIZE_AVX512BW + return _mm512_min_epu8(a.val, b.val); +#else + __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0); + __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1); + __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0); + __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1); + __m256i r0 = _mm256_min_epu8(ap0, bp0); + __m256i r1 = _mm256_min_epu8(ap1, bp1); + return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1); +#endif +} +template <> +EIGEN_STRONG_INLINE Packet64q8u pmax<Packet64q8u>(const Packet64q8u& a, + const Packet64q8u& b) { +#ifdef EIGEN_VECTORIZE_AVX512BW + return _mm512_max_epu8(a.val, b.val); +#else + __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0); + __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1); + __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0); + __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1); + __m256i r0 = _mm256_max_epu8(ap0, bp0); + __m256i r1 = _mm256_max_epu8(ap1, bp1); + return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1); +#endif +} + +template <> +EIGEN_STRONG_INLINE Packet64q8i pmin<Packet64q8i>(const Packet64q8i& a, + const Packet64q8i& b) { +#ifdef EIGEN_VECTORIZE_AVX512BW + return _mm512_min_epi8(a.val, b.val); +#else + __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0); + __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1); + __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0); + __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1); + __m256i r0 = _mm256_min_epi8(ap0, bp0); + __m256i r1 = _mm256_min_epi8(ap1, bp1); + return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1); +#endif +} +template <> +EIGEN_STRONG_INLINE Packet32q16i pmin<Packet32q16i>(const Packet32q16i& a, + const Packet32q16i& b) { +#ifdef EIGEN_VECTORIZE_AVX512BW + return _mm512_min_epi16(a.val, b.val); +#else + __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0); + __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1); + __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0); + __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1); + __m256i r0 = _mm256_min_epi16(ap0, bp0); + __m256i r1 = _mm256_min_epi16(ap1, bp1); + return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1); +#endif +} +template <> +EIGEN_STRONG_INLINE Packet64q8i pmax<Packet64q8i>(const Packet64q8i& a, + const Packet64q8i& b) { +#ifdef EIGEN_VECTORIZE_AVX512BW + return _mm512_max_epi8(a.val, b.val); +#else + __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0); + __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1); + __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0); + __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1); + __m256i r0 = _mm256_max_epi8(ap0, bp0); + __m256i r1 = _mm256_max_epi8(ap1, bp1); + return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1); +#endif +} +template <> +EIGEN_STRONG_INLINE Packet32q16i pmax<Packet32q16i>(const Packet32q16i& a, + const Packet32q16i& b) { +#ifdef EIGEN_VECTORIZE_AVX512BW + return _mm512_max_epi16(a.val, b.val); +#else + __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0); + __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1); + __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0); + __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1); + __m256i r0 = _mm256_max_epi16(ap0, bp0); + __m256i r1 = _mm256_max_epi16(ap1, bp1); + return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1); +#endif +} + +// Reductions. +template <> +EIGEN_STRONG_INLINE QInt32 predux_min<Packet16q32i>(const Packet16q32i& a) { + Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0); + Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1); + Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2); + Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3); + Packet4i res = + _mm_min_epi32(_mm_min_epi32(lane0, lane1), _mm_min_epi32(lane2, lane3)); + res = _mm_min_epi32(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2))); + return pfirst( + _mm_min_epi32( + res, + _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)))); +} +template <> +EIGEN_STRONG_INLINE QInt32 predux_max<Packet16q32i>(const Packet16q32i& a) { + Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0); + Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1); + Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2); + Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3); + Packet4i res = + _mm_max_epi32(_mm_max_epi32(lane0, lane1), _mm_max_epi32(lane2, lane3)); + res = _mm_max_epi32(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2))); + return pfirst( + _mm_max_epi32( + res, + _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)))); +} +template <> +EIGEN_STRONG_INLINE QInt16 predux_min<Packet32q16i>(const Packet32q16i& a) { + Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0); + Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1); + Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2); + Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3); + Packet4i res = + _mm_min_epi16(_mm_min_epi16(lane0, lane1), _mm_min_epi16(lane2, lane3)); + res = _mm_min_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2))); + std::uint32_t w = + pfirst( + _mm_min_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)))); + return std::min({ + static_cast<std::int16_t>(w >> 16), + static_cast<std::int16_t>(w) + }); +} +template <> +EIGEN_STRONG_INLINE QInt16 predux_max<Packet32q16i>(const Packet32q16i& a) { + Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0); + Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1); + Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2); + Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3); + Packet4i res = + _mm_max_epi16(_mm_max_epi16(lane0, lane1), _mm_max_epi16(lane2, lane3)); + res = _mm_max_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2))); + std::uint32_t w = + pfirst( + _mm_max_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)))); + return std::min({ + static_cast<std::int16_t>(w >> 16), + static_cast<std::int16_t>(w) + }); +} +template <> +EIGEN_STRONG_INLINE QUInt8 predux_min<Packet64q8u>(const Packet64q8u& a) { + Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0); + Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1); + Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2); + Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3); + Packet4i res = + _mm_min_epu8(_mm_min_epu8(lane0, lane1), _mm_min_epu8(lane2, lane3)); + res = _mm_min_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2))); + std::uint32_t w = + pfirst( + _mm_min_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)))); + return std::min({ + static_cast<std::uint8_t>(w >> 24), + static_cast<std::uint8_t>(w >> 16), + static_cast<std::uint8_t>(w >> 8), + static_cast<std::uint8_t>(w) + }); +} +template <> +EIGEN_STRONG_INLINE QUInt8 predux_max<Packet64q8u>(const Packet64q8u& a) { + Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0); + Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1); + Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2); + Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3); + Packet4i res = + _mm_max_epu8(_mm_max_epu8(lane0, lane1), _mm_max_epu8(lane2, lane3)); + res = _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2))); + std::uint32_t w = + pfirst( + _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)))); + return std::min({ + static_cast<std::uint8_t>(w >> 24), + static_cast<std::uint8_t>(w >> 16), + static_cast<std::uint8_t>(w >> 8), + static_cast<std::uint8_t>(w) + }); +} +template <> +EIGEN_STRONG_INLINE QInt8 predux_min<Packet64q8i>(const Packet64q8i& a) { + Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0); + Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1); + Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2); + Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3); + Packet4i res = + _mm_min_epi8(_mm_min_epi8(lane0, lane1), _mm_min_epi8(lane2, lane3)); + res = _mm_min_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2))); + std::uint32_t w = + pfirst( + _mm_min_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)))); + return std::min({ + static_cast<std::int8_t>(w >> 24), + static_cast<std::int8_t>(w >> 16), + static_cast<std::int8_t>(w >> 8), + static_cast<std::int8_t>(w) + }); +} +template <> +EIGEN_STRONG_INLINE QInt8 predux_max<Packet64q8i>(const Packet64q8i& a) { + Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0); + Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1); + Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2); + Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3); + Packet4i res = + _mm_max_epi8(_mm_max_epi8(lane0, lane1), _mm_max_epi8(lane2, lane3)); + res = _mm_max_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2))); + std::uint32_t w = + pfirst( + _mm_max_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)))); + return std::min({ + static_cast<std::int8_t>(w >> 24), + static_cast<std::int8_t>(w >> 16), + static_cast<std::int8_t>(w >> 8), + static_cast<std::int8_t>(w) + }); +} + +} // end namespace internal +} // end namespace Eigen + +#endif // THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h new file mode 100644 index 0000000000..cd7120ec00 --- /dev/null +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h @@ -0,0 +1,180 @@ +#ifndef THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ +#define THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ + +namespace Eigen { +namespace internal { + +typedef __m512 Packet16f; +typedef __m512i Packet16i; + +template <> +struct type_casting_traits<QInt32, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; + +template <> +EIGEN_STRONG_INLINE Packet16f pcast<Packet16q32i>(const Packet16q32i& a) { + return _mm512_cvtepi32_ps(a.val); +} + +template <> +struct type_casting_traits<float, QInt32> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; + +template <> +EIGEN_STRONG_INLINE Packet16q32i pcast<Packet16f>(const Packet16f& a) { + return _mm512_cvtps_epi32(a); +} + +template <> +struct type_casting_traits<float, QInt16> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; + +template <> +EIGEN_STRONG_INLINE Packet32q16i +pcast<Packet16f>(const Packet16f& a, const Packet16f& b) { + Packet16i a_int = _mm512_cvtps_epi32(a); + Packet16i b_int = _mm512_cvtps_epi32(b); +#ifdef EIGEN_VECTORIZE_AVX512BW + return _mm512_packs_epi32(a_int, b_int); +#else + Packet8i ab_int16_low = + _mm256_permute4x64_epi64( + _mm256_packs_epi32( + _mm512_castsi512_si256(a_int), + _mm512_castsi512_si256(b_int)), + _MM_SHUFFLE(0, 2, 1, 3)); + Packet8i ab_int16_high = + _mm256_permute4x64_epi64( + _mm256_packs_epi32( + _mm512_extracti32x8_epi32(a_int, 1), + _mm512_extracti32x8_epi32(b_int, 1)), + _MM_SHUFFLE(0, 2, 1, 3)); + return _mm512_inserti32x8( + _mm512_castsi256_si512(ab_int16_low), + ab_int16_high, 1); +#endif +} + +template <> +struct type_casting_traits<float, QInt8> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; + +template <> +EIGEN_STRONG_INLINE Packet64q8i +pcast<Packet16f>(const Packet16f& a, + const Packet16f& b, + const Packet16f& c, + const Packet16f& d) { + Packet16i a_int = _mm512_cvtps_epi32(a); + Packet16i b_int = _mm512_cvtps_epi32(b); + Packet16i c_int = _mm512_cvtps_epi32(c); + Packet16i d_int = _mm512_cvtps_epi32(d); +#ifdef EIGEN_VECTORIZE_AVX512BW + return _mm512_packs_epi16( + _mm512_packs_epi32(a_int, b_int), + _mm512_packs_epi32(c_int, d_int)); +#else + Packet8i ab_int16_low = + _mm256_permute4x64_epi64( + _mm256_packs_epi32( + _mm512_castsi512_si256(a_int), + _mm512_castsi512_si256(b_int)), + _MM_SHUFFLE(0, 2, 1, 3)); + Packet8i cd_int16_low = + _mm256_permute4x64_epi64( + _mm256_packs_epi32( + _mm512_castsi512_si256(c_int), + _mm512_castsi512_si256(d_int)), + _MM_SHUFFLE(0, 2, 1, 3)); + Packet8i ab_int16_high = + _mm256_permute4x64_epi64( + _mm256_packs_epi32( + _mm512_extracti32x8_epi32(a_int, 1), + _mm512_extracti32x8_epi32(b_int, 1)), + _MM_SHUFFLE(0, 2, 1, 3)); + Packet8i cd_int16_high = + _mm256_permute4x64_epi64( + _mm256_packs_epi32( + _mm512_extracti32x8_epi32(c_int, 1), + _mm512_extracti32x8_epi32(d_int, 1)), + _MM_SHUFFLE(0, 2, 1, 3)); + Packet8i abcd_int8_low = + _mm256_permute4x64_epi64( + _mm256_packs_epi16(ab_int16_low, cd_int16_low), + _MM_SHUFFLE(0, 2, 1, 3)); + Packet8i abcd_int8_high = + _mm256_permute4x64_epi64( + _mm256_packs_epi16(ab_int16_high, cd_int16_high), + _MM_SHUFFLE(0, 2, 1, 3)); + return _mm512_inserti32x8( + _mm512_castsi256_si512(abcd_int8_low), + abcd_int8_high, 1); +#endif +} + +template <> +struct type_casting_traits<QInt32, QInt8> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; + +template <> +struct type_casting_traits<QInt32, QInt16> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; + +template <> +EIGEN_STRONG_INLINE Packet64q8i +pcast<Packet16q32i, Packet64q8i>(const Packet16q32i& a, + const Packet16q32i& b, + const Packet16q32i& c, + const Packet16q32i& d) { + __m512i converted = _mm512_packs_epi16(_mm512_packs_epi32(a.val, b.val), + _mm512_packs_epi32(c.val, d.val)); + return converted; +} + +template <> +EIGEN_STRONG_INLINE Packet32q16i +pcast<Packet16q32i, Packet32q16i>(const Packet16q32i& a, + const Packet16q32i& b) { + __m512i converted = _mm512_packs_epi32(a.val, b.val); + return converted; +} + +template <> +struct type_casting_traits<QInt32, QUInt8> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; + +template <> +EIGEN_STRONG_INLINE Packet64q8u +pcast<Packet16q32i, Packet64q8u>(const Packet16q32i& a, const Packet16q32i& b, + const Packet16q32i& c, const Packet16q32i& d) { + const __m512i converted = _mm512_packus_epi16( + _mm512_packus_epi32(a.val, b.val), _mm512_packus_epi32(c.val, d.val)); + return converted; +} + +template <> +struct type_casting_traits<QInt32, QUInt16> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; + +#if 0 +template <> +EIGEN_STRONG_INLINE Packet32q16u +pcast<Packet16q32i, Packet32q16u>(const Packet16q32i& a, + const Packet16q32i& b) { + const __m512i converted = _mm512_packus_epi32(a.val, b.val); + return converted; +} +#endif + +} // end namespace internal +} // end namespace Eigen + +#endif // THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ diff --git a/third_party/jpeg.BUILD b/third_party/jpeg.BUILD deleted file mode 100644 index cbc1e86e51..0000000000 --- a/third_party/jpeg.BUILD +++ /dev/null @@ -1,416 +0,0 @@ -# Description: -# libjpeg-turbo is a drop in replacement for jpeglib optimized with SIMD. - -licenses(["notice"]) # custom notice-style license, see LICENSE.md - -exports_files(["LICENSE.md"]) - -libjpegturbo_nocopts = "-[W]error" - -libjpegturbo_copts = select({ - ":android": [ - "-O2", - "-fPIE", - "-w", - ], - ":windows": [ - "/Ox", - "/w14711", # function 'function' selected for inline expansion - "/w14710", # 'function' : function not inlined - ], - "//conditions:default": [ - "-O3", - "-w", - ], -}) + select({ - ":armeabi-v7a": [ - "-D__ARM_NEON__", - "-march=armv7-a", - "-mfloat-abi=softfp", - "-fprefetch-loop-arrays", - ], - "//conditions:default": [], -}) - -cc_library( - name = "jpeg", - srcs = [ - "jaricom.c", - "jcapimin.c", - "jcapistd.c", - "jcarith.c", - "jccoefct.c", - "jccolor.c", - "jcdctmgr.c", - "jchuff.c", - "jchuff.h", - "jcinit.c", - "jcmainct.c", - "jcmarker.c", - "jcmaster.c", - "jcomapi.c", - "jconfig.h", - "jconfigint.h", - "jcparam.c", - "jcphuff.c", - "jcprepct.c", - "jcsample.c", - "jctrans.c", - "jdapimin.c", - "jdapistd.c", - "jdarith.c", - "jdatadst.c", - "jdatasrc.c", - "jdcoefct.c", - "jdcoefct.h", - "jdcolor.c", - "jdct.h", - "jddctmgr.c", - "jdhuff.c", - "jdhuff.h", - "jdinput.c", - "jdmainct.c", - "jdmainct.h", - "jdmarker.c", - "jdmaster.c", - "jdmaster.h", - "jdmerge.c", - "jdphuff.c", - "jdpostct.c", - "jdsample.c", - "jdsample.h", - "jdtrans.c", - "jerror.c", - "jfdctflt.c", - "jfdctfst.c", - "jfdctint.c", - "jidctflt.c", - "jidctfst.c", - "jidctint.c", - "jidctred.c", - "jinclude.h", - "jmemmgr.c", - "jmemnobs.c", - "jmemsys.h", - "jpeg_nbits_table.h", - "jpegcomp.h", - "jquant1.c", - "jquant2.c", - "jutils.c", - "jversion.h", - ], - hdrs = [ - "jccolext.c", # should have been named .inc - "jdcol565.c", # should have been named .inc - "jdcolext.c", # should have been named .inc - "jdmrg565.c", # should have been named .inc - "jdmrgext.c", # should have been named .inc - "jerror.h", - "jmorecfg.h", - "jpegint.h", - "jpeglib.h", - "jstdhuff.c", # should have been named .inc - ], - copts = libjpegturbo_copts, - nocopts = libjpegturbo_nocopts, - visibility = ["//visibility:public"], - deps = select({ - ":k8": [":simd_x86_64"], - ":armeabi-v7a": [":simd_armv7a"], - ":arm64-v8a": [":simd_armv8a"], - "//conditions:default": [":simd_none"], - }), -) - -cc_library( - name = "simd_x86_64", - srcs = [ - "jchuff.h", - "jconfig.h", - "jdct.h", - "jerror.h", - "jinclude.h", - "jmorecfg.h", - "jpegint.h", - "jpeglib.h", - "jsimd.h", - "jsimddct.h", - "simd/jccolor-sse2-64.o", - "simd/jcgray-sse2-64.o", - "simd/jchuff-sse2-64.o", - "simd/jcsample-sse2-64.o", - "simd/jdcolor-sse2-64.o", - "simd/jdmerge-sse2-64.o", - "simd/jdsample-sse2-64.o", - "simd/jfdctflt-sse-64.o", - "simd/jfdctfst-sse2-64.o", - "simd/jfdctint-sse2-64.o", - "simd/jidctflt-sse2-64.o", - "simd/jidctfst-sse2-64.o", - "simd/jidctint-sse2-64.o", - "simd/jidctred-sse2-64.o", - "simd/jquantf-sse2-64.o", - "simd/jquanti-sse2-64.o", - "simd/jsimd.h", - "simd/jsimd_x86_64.c", - ], - copts = libjpegturbo_copts, - linkstatic = 1, - nocopts = libjpegturbo_nocopts, -) - -genrule( - name = "simd_x86_64_assemblage23", - srcs = [ - "simd/jccolext-sse2-64.asm", - "simd/jccolor-sse2-64.asm", - "simd/jcgray-sse2-64.asm", - "simd/jcgryext-sse2-64.asm", - "simd/jchuff-sse2-64.asm", - "simd/jcolsamp.inc", - "simd/jcsample-sse2-64.asm", - "simd/jdcolext-sse2-64.asm", - "simd/jdcolor-sse2-64.asm", - "simd/jdct.inc", - "simd/jdmerge-sse2-64.asm", - "simd/jdmrgext-sse2-64.asm", - "simd/jdsample-sse2-64.asm", - "simd/jfdctflt-sse-64.asm", - "simd/jfdctfst-sse2-64.asm", - "simd/jfdctint-sse2-64.asm", - "simd/jidctflt-sse2-64.asm", - "simd/jidctfst-sse2-64.asm", - "simd/jidctint-sse2-64.asm", - "simd/jidctred-sse2-64.asm", - "simd/jpeg_nbits_table.inc", - "simd/jquantf-sse2-64.asm", - "simd/jquanti-sse2-64.asm", - "simd/jsimdcfg.inc", - "simd/jsimdext.inc", - ], - outs = [ - "simd/jccolor-sse2-64.o", - "simd/jcgray-sse2-64.o", - "simd/jchuff-sse2-64.o", - "simd/jcsample-sse2-64.o", - "simd/jdcolor-sse2-64.o", - "simd/jdmerge-sse2-64.o", - "simd/jdsample-sse2-64.o", - "simd/jfdctflt-sse-64.o", - "simd/jfdctfst-sse2-64.o", - "simd/jfdctint-sse2-64.o", - "simd/jidctflt-sse2-64.o", - "simd/jidctfst-sse2-64.o", - "simd/jidctint-sse2-64.o", - "simd/jidctred-sse2-64.o", - "simd/jquantf-sse2-64.o", - "simd/jquanti-sse2-64.o", - ], - cmd = "for out in $(OUTS); do\n" + - " $(location @nasm//:nasm) -f elf64" + - " -DELF -DPIC -DRGBX_FILLER_0XFF -D__x86_64__ -DARCH_X86_64" + - " -I $$(dirname $(location simd/jdct.inc))/" + - " -I $$(dirname $(location simd/jsimdcfg.inc))/" + - " -o $$out" + - " $$(dirname $(location simd/jdct.inc))/$$(basename $${out%.o}.asm)\n" + - "done", - tools = ["@nasm//:nasm"], -) - -cc_library( - name = "simd_armv7a", - srcs = [ - "jchuff.h", - "jconfig.h", - "jdct.h", - "jinclude.h", - "jmorecfg.h", - "jpeglib.h", - "jsimd.h", - "jsimddct.h", - "simd/jsimd.h", - "simd/jsimd_arm.c", - "simd/jsimd_arm_neon.S", - ], - copts = libjpegturbo_copts, - nocopts = libjpegturbo_nocopts, -) - -cc_library( - name = "simd_armv8a", - srcs = [ - "jchuff.h", - "jconfig.h", - "jdct.h", - "jinclude.h", - "jmorecfg.h", - "jpeglib.h", - "jsimd.h", - "jsimddct.h", - "simd/jsimd.h", - "simd/jsimd_arm64.c", - "simd/jsimd_arm64_neon.S", - ], - copts = libjpegturbo_copts, - nocopts = libjpegturbo_nocopts, -) - -cc_library( - name = "simd_none", - srcs = [ - "jchuff.h", - "jconfig.h", - "jdct.h", - "jerror.h", - "jinclude.h", - "jmorecfg.h", - "jpegint.h", - "jpeglib.h", - "jsimd.h", - "jsimd_none.c", - "jsimddct.h", - ], - copts = libjpegturbo_copts, - nocopts = libjpegturbo_nocopts, -) - -genrule( - name = "configure", - outs = ["jconfig.h"], - cmd = "cat <<'EOF' >$@\n" + - "#define JPEG_LIB_VERSION 62\n" + - "#define LIBJPEG_TURBO_VERSION 1.5.1\n" + - "#define LIBJPEG_TURBO_VERSION_NUMBER 1005001\n" + - "#define C_ARITH_CODING_SUPPORTED 1\n" + - "#define D_ARITH_CODING_SUPPORTED 1\n" + - "#define BITS_IN_JSAMPLE 8\n" + - "#define HAVE_LOCALE_H 1\n" + - "#define HAVE_STDDEF_H 1\n" + - "#define HAVE_STDLIB_H 1\n" + - "#define HAVE_UNSIGNED_CHAR 1\n" + - "#define HAVE_UNSIGNED_SHORT 1\n" + - "#define MEM_SRCDST_SUPPORTED 1\n" + - "#define NEED_SYS_TYPES_H 1\n" + - select({ - ":k8": "#define WITH_SIMD 1\n", - ":armeabi-v7a": "#define WITH_SIMD 1\n", - ":arm64-v8a": "#define WITH_SIMD 1\n", - "//conditions:default": "", - }) + - "EOF", -) - -genrule( - name = "configure_internal", - outs = ["jconfigint.h"], - cmd = "cat <<'EOF' >$@\n" + - "#define BUILD \"20161115\"\n" + - "#ifdef _MSC_VER /* Windows */\n" + - "#define INLINE __inline\n" + - "#else\n" + - "#define INLINE inline __attribute__((always_inline))\n" + - "#endif\n" + - "#define PACKAGE_NAME \"libjpeg-turbo\"\n" + - "#define VERSION \"1.5.1\"\n" + - "#if (__WORDSIZE==64 && !defined(__native_client__)) || defined(_WIN64)\n" + - "#define SIZEOF_SIZE_T 8\n" + - "#else\n" + - "#define SIZEOF_SIZE_T 4\n" + - "#endif\n" + - "EOF", -) - -# jiminy cricket the way this file is generated is completely outrageous -genrule( - name = "configure_simd", - outs = ["simd/jsimdcfg.inc"], - cmd = "cat <<'EOF' >$@\n" + - "%define DCTSIZE 8\n" + - "%define DCTSIZE2 64\n" + - "%define RGB_RED 0\n" + - "%define RGB_GREEN 1\n" + - "%define RGB_BLUE 2\n" + - "%define RGB_PIXELSIZE 3\n" + - "%define EXT_RGB_RED 0\n" + - "%define EXT_RGB_GREEN 1\n" + - "%define EXT_RGB_BLUE 2\n" + - "%define EXT_RGB_PIXELSIZE 3\n" + - "%define EXT_RGBX_RED 0\n" + - "%define EXT_RGBX_GREEN 1\n" + - "%define EXT_RGBX_BLUE 2\n" + - "%define EXT_RGBX_PIXELSIZE 4\n" + - "%define EXT_BGR_RED 2\n" + - "%define EXT_BGR_GREEN 1\n" + - "%define EXT_BGR_BLUE 0\n" + - "%define EXT_BGR_PIXELSIZE 3\n" + - "%define EXT_BGRX_RED 2\n" + - "%define EXT_BGRX_GREEN 1\n" + - "%define EXT_BGRX_BLUE 0\n" + - "%define EXT_BGRX_PIXELSIZE 4\n" + - "%define EXT_XBGR_RED 3\n" + - "%define EXT_XBGR_GREEN 2\n" + - "%define EXT_XBGR_BLUE 1\n" + - "%define EXT_XBGR_PIXELSIZE 4\n" + - "%define EXT_XRGB_RED 1\n" + - "%define EXT_XRGB_GREEN 2\n" + - "%define EXT_XRGB_BLUE 3\n" + - "%define EXT_XRGB_PIXELSIZE 4\n" + - "%define RGBX_FILLER_0XFF 1\n" + - "%define JSAMPLE byte ; unsigned char\n" + - "%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE)\n" + - "%define CENTERJSAMPLE 128\n" + - "%define JCOEF word ; short\n" + - "%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF)\n" + - "%define JDIMENSION dword ; unsigned int\n" + - "%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION)\n" + - "%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h)\n" + - "%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h)\n" + - "%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h)\n" + - "%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h)\n" + - "%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW)\n" + - "%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY)\n" + - "%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE)\n" + - "%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR)\n" + - "%define DCTELEM word ; short\n" + - "%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM)\n" + - "%define float FP32 ; float\n" + - "%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(float)\n" + - "%define ISLOW_MULT_TYPE word ; must be short\n" + - "%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE)\n" + - "%define IFAST_MULT_TYPE word ; must be short\n" + - "%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE)\n" + - "%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors\n" + - "%define FLOAT_MULT_TYPE FP32 ; must be float\n" + - "%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE)\n" + - "%define JSIMD_NONE 0x00\n" + - "%define JSIMD_MMX 0x01\n" + - "%define JSIMD_3DNOW 0x02\n" + - "%define JSIMD_SSE 0x04\n" + - "%define JSIMD_SSE2 0x08\n" + - "EOF", -) - -config_setting( - name = "k8", - values = {"cpu": "k8"}, -) - -config_setting( - name = "android", - values = {"crosstool_top": "//external:android/crosstool"}, -) - -config_setting( - name = "armeabi-v7a", - values = {"android_cpu": "armeabi-v7a"}, -) - -config_setting( - name = "arm64-v8a", - values = {"android_cpu": "arm64-v8a"}, -) - -config_setting( - name = "windows", - values = {"cpu": "x64_windows_msvc"}, -) diff --git a/third_party/sycl/crosstool/BUILD b/third_party/sycl/crosstool/BUILD new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/third_party/sycl/crosstool/BUILD @@ -0,0 +1 @@ + diff --git a/third_party/sycl/sycl/LICENSE.text.tpl b/third_party/sycl/sycl/LICENSE.text.tpl deleted file mode 100644 index 0c2955c4d7..0000000000 --- a/third_party/sycl/sycl/LICENSE.text.tpl +++ /dev/null @@ -1,268 +0,0 @@ - ---------------------------------------------------------------------- - -SOFTWARE LICENSE AGREEMENT - ---------------------------------------------------------------------- ---------------------------------------------------------------------- - -By downloading, installing, copying, or otherwise using the -ComputeCpp Community Edition software, including any associated -components, media, printed materials, and electronic documentation -("Software"), the user agrees to the following terms and conditions -of this Software License Agreement ("Agreement"). Please read the -terms of this Agreement carefully before beginning your download, as -pressing the "I AGREE" button at the end of this Agreement will -confirm your assent. If you do not agree to these terms, then -Codeplay Software Limited is unwilling to license the Software to -you; so please press the "CANCEL" button to cancel your download. - - 1. License. Codeplay Software Ltd., a company incorporated in - England and Wales with registered number 04567874 and having its - registered office at Regent House, 316 Beulah Hill, London, - United Kingdom, SE19 3HF ("Codeplay") hereby grants the user, - free of charge, a non-exclusive worldwide license to use and - replicate (but not modify) the Software for any use, whether - commercial or non-commercial, in accordance with this Agreement. - Codeplay reserves all rights to the Software that are not - expressly granted by this Agreement. - 2. Redistribution. The user may copy and redistribute unmodified - copies of only those components of the Software which are - specified below ("Redistributable Components"), in object code - form, as part of the user’s software applications or libraries - ("Applications"). The user acknowledges and agrees that it has no - right to modify the Redistributable Components in any way. Any - use of the Redistributable Components within the user’s - Applications will continue to be subject to the terms and - conditions of this Agreement, and the user must also distribute a - copy of this Agreement and reproduce and include all notices of - copyrights or other proprietary rights in the Software. The - user’s redistribution of the Redistributable Components will not - entitle it to any payment from Codeplay. The user may not - transfer any of its rights or obligations under this Agreement. - -+-------------------------------------------+ -|Redistributable Component|File Name | -|-------------------------+-----------------| -|Runtime (for Linux) |libComputeCpp.so | -|-------------------------+-----------------| -|Runtime (for Windows) |libComputeCpp.dll| -+-------------------------------------------+ - - 3. Restrictions. The user shall not: - - a. circumvent or bypass any technological protection measures in - or relating to the Software; - b. use the Software to perform any unauthorized transfer of - information or for any illegal purpose; - c. de-compile, decrypt, disassemble, hack, emulate, exploit or - reverse-engineer the Software (other than to the limited - extent permitted by law); - d. copy or redistribute any components of the Software that are - not listed in the table of Redistributable Components; - e. publish, rent, lease, sell, export, import, or lend the - Software; - f. represent in any way that it is selling the Software itself - or any license to use the Software, nor refer to Codeplay or - ComputeCpp within its marketing materials, without the - express prior written permission of Codeplay. - 4. Support. Codeplay does not provide any guarantees of support for - the Software to the user. Codeplay will use reasonable endeavours - to respond to users' support requests, for the most recent - release only, via the community support website at https:// - computecpp.codeplay.com. - 5. Intellectual Property. The Software is owned by Codeplay or its - licensors, and is protected by the copyright laws of the United - Kingdom and other countries and international treaty provisions. - Codeplay (and/or its licensors, as the case may be) retains all - copyrights, trade secrets and other proprietary rights in the - Software, including the rights to make and license the use of all - copies. To the extent that any patents owned by Codeplay or its - licensors relate to any component of the Software, the licence - granted to the user in accordance with this Agreement allows for - the lawful use of such patents but only for the purposes of this - Agreement and not further or otherwise. Therefore, the user may - make no copies of the Software, or the written materials that - accompany the Software, or reproduce it in any way, except as set - forth above. - 6. Terms. This Agreement is effective until terminated. Codeplay or - the user may terminate it immediately at any time. Any violation - of the terms of this Agreement by the user will result in - immediate termination by Codeplay. Upon termination, the user - must return or destroy the Software and accompanying materials - and notify Codeplay of its actions by email to info@codeplay.com. - 7. NO WARRANTIES. Codeplay expressly disclaims any warranty for the - Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF - ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE - WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE - AND NON-INFRINGEMENT. IN NO EVENT SHALL CODEPLAY BE LIABLE FOR - ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - CONTRACT, DELICT OR TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. In particular, Codeplay provides no guarantees of - application performance on the target hardware. - 8. General. The invalidity of any portion or provision of this - Agreement shall not affect any other portions or provisions. This - Agreement shall be governed by the laws of Scotland. This - Agreement is the complete and exclusive agreement between the - user and Codeplay regarding the Software, and it supersedes any - prior agreement, oral or written, and any other communication - between the user and Codeplay relating to the subject matter of - the Agreement. Any amendment or modification of this Agreement - must be in writing and signed by both parties. If the user does - not agree to the terms of this Agreement, the user must not - install or use the Software. - 9. Third Party Licenses. The following licenses are for third-party - components included in the software. - - a. License for Clang/LLVM compiler technology components: - -============================================================================== - -LLVM Release License - -============================================================================== - -University of Illinois/NCSA - -Open Source License - -Copyright (c) 2007-2014 University of Illinois at Urbana-Champaign. - -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - -============================================================================== - - b. License for OpenBSD regex components: - -$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $ -Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. -This software is not subject to any license of the American Telephone -and Telegraph Company or of the Regents of the University of California. -Permission is granted to anyone to use this software for any purpose on -any computer system, and to alter it and redistribute it, subject -to the following restrictions: - -1. The author is not responsible for the consequences of use of this - software, no matter how awful, even if they arise from flaws in it. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. Since few users ever read sources, - credits must appear in the documentation. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. Since few users - ever read sources, credits must appear in the documentation. - -4. This notice may not be removed or altered. - -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= - -/*- - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)COPYRIGHT8.1 (Berkeley) 3/16/94 - */ - - c. License for MD5 components: - -/* - * This code is derived from (original license follows): - * - * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. - * MD5 Message-Digest Algorithm (RFC 1321). - * - * Homepage: - * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 - * - * Author: - * Alexander Peslyak, better known as Solar Designer <solar at openwall.com> - * - * This software was written by Alexander Peslyak in 2001. No copyright is - * claimed, and the software is hereby placed in the public domain. - * In case this attempt to disclaim copyright and place the software in the - * public domain is deemed null and void, then the software is - * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the - * general public under the following terms: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted. - * - * There's ABSOLUTELY NO WARRANTY, express or implied. - * - * (This is a heavily cut-down "BSD license".) - * - * This differs from Colin Plumb's older public domain implementation in that - * no exactly 32-bit integer data type is required (any 32-bit or wider - * unsigned integer data type will do), there's no compile-time endianness - * configuration, and the function prototypes match OpenSSL's. No code from - * Colin Plumb's implementation has been reused; this comment merely compares - * the properties of the two independent implementations. - * - * The primary goals of this implementation are portability and ease of use. - * It is meant to be fast, but not as fast as possible. Some known - * optimizations are not included to reduce source code size and avoid - * compile-time configuration. - */ - - |