diff options
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/TypeCasting.h | 48 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/PacketMath.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/arch/Default/BFloat16.h | 27 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 5 | ||||
-rw-r--r-- | test/main.h | 3 | ||||
-rw-r--r-- | test/packetmath.cpp | 81 | ||||
-rw-r--r-- | test/packetmath_test_shared.h | 8 |
8 files changed, 114 insertions, 70 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index cf7146cbc..d5dc6a174 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -1205,6 +1205,11 @@ EIGEN_STRONG_INLINE Packet8bf pmax<Packet8bf>(const Packet8bf& a, return F32ToBf16(pmax<Packet8f>(Bf16ToF32(a), Bf16ToF32(b))); } +template <> +EIGEN_STRONG_INLINE Packet8bf plset<Packet8bf>(const bfloat16& a) { + return F32ToBf16(plset<Packet8f>(static_cast<float>(a))); +} + template<> EIGEN_STRONG_INLINE Packet8bf por(const Packet8bf& a,const Packet8bf& b) { return _mm_or_si128(a,b); } diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h index c669a7f60..d507fb67b 100644 --- a/Eigen/src/Core/arch/AVX/TypeCasting.h +++ b/Eigen/src/Core/arch/AVX/TypeCasting.h @@ -35,23 +35,6 @@ struct type_casting_traits<int, float> { }; - -template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) { - return _mm256_cvttps_epi32(a); -} - -template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) { - return _mm256_cvtepi32_ps(a); -} - -template<> EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i,Packet8f>(const Packet8f& a) { - return _mm256_castps_si256(a); -} - -template<> EIGEN_STRONG_INLINE Packet8f preinterpret<Packet8f,Packet8i>(const Packet8i& a) { - return _mm256_castsi256_ps(a); -} - #ifndef EIGEN_VECTORIZE_AVX512 template <> @@ -63,9 +46,6 @@ struct type_casting_traits<Eigen::half, float> { }; }; -template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) { - return half2float(a); -} template <> struct type_casting_traits<float, Eigen::half> { @@ -85,10 +65,6 @@ struct type_casting_traits<bfloat16, float> { }; }; -template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8bf, Packet8f>(const Packet8bf& a) { - return Bf16ToF32(a); -} - template <> struct type_casting_traits<float, bfloat16> { enum { @@ -100,6 +76,30 @@ struct type_casting_traits<float, bfloat16> { #endif // EIGEN_VECTORIZE_AVX512 +template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) { + return _mm256_cvttps_epi32(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) { + return _mm256_cvtepi32_ps(a); +} + +template<> EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i,Packet8f>(const Packet8f& a) { + return _mm256_castps_si256(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f preinterpret<Packet8f,Packet8i>(const Packet8i& a) { + return _mm256_castsi256_ps(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) { + return half2float(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8bf, Packet8f>(const Packet8bf& a) { + return Bf16ToF32(a); +} + template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) { return float2half(a); } diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 76f3366d7..8b946b3e1 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -1626,8 +1626,6 @@ template <> struct is_arithmetic<Packet16bf> { enum { value = true }; }; template <> struct packet_traits<bfloat16> : default_packet_traits { typedef Packet16bf type; - // There is no half-size packet for current Packet16bf. - // TODO: support as SSE path. typedef Packet8bf half; enum { Vectorizable = 1, @@ -1884,6 +1882,11 @@ EIGEN_STRONG_INLINE Packet16bf pmax<Packet16bf>(const Packet16bf& a, } template <> +EIGEN_STRONG_INLINE Packet16bf plset<Packet16bf>(const bfloat16& a) { + return F32ToBf16(plset<Packet16f>(static_cast<float>(a))); +} + +template <> EIGEN_STRONG_INLINE Packet8bf predux_half_dowto4<Packet16bf>(const Packet16bf& a) { Packet8bf lane0 = _mm256_extractf128_si256(a, 0); Packet8bf lane1 = _mm256_extractf128_si256(a, 1); diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h index 7c147ae34..4d5fa1bf8 100644 --- a/Eigen/src/Core/arch/Default/BFloat16.h +++ b/Eigen/src/Core/arch/Default/BFloat16.h @@ -103,8 +103,8 @@ struct numeric_limits<Eigen::bfloat16> { static const bool has_infinity = true; static const bool has_quiet_NaN = true; static const bool has_signaling_NaN = true; - static const float_denorm_style has_denorm = numeric_limits<float>::has_denorm; - static const bool has_denorm_loss = numeric_limits<float>::has_denorm_loss; + static const float_denorm_style has_denorm = std::denorm_absent; + static const bool has_denorm_loss = false; static const std::float_round_style round_style = numeric_limits<float>::round_style; static const bool is_iec559 = false; static const bool is_bounded = true; @@ -551,18 +551,24 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) { } #if EIGEN_HAS_CXX11_MATH EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) { - return bfloat16(::asinh(float(a))); + EIGEN_USING_STD(asinhf); + return bfloat16(asinhf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) { - return bfloat16(::acosh(float(a))); + EIGEN_USING_STD(acoshf); + return bfloat16(acoshf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) { - return bfloat16(::atanh(float(a))); + EIGEN_USING_STD(atanhf); + return bfloat16(atanhf(float(a))); } #endif EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) { return bfloat16(::floorf(float(a))); } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) { + return bfloat16(::rintf(float(a))); +} EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) { return bfloat16(::ceilf(float(a))); } @@ -581,6 +587,17 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bf return f1 < f2 ? b : a; } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfloat16& b) { + const float f1 = static_cast<float>(a); + const float f2 = static_cast<float>(b); + return bfloat16(::fminf(f1, f2)); +} +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) { + const float f1 = static_cast<float>(a); + const float f2 = static_cast<float>(b); + return bfloat16(::fmaxf(f1, f2)); +} + #ifndef EIGEN_NO_IO EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const bfloat16& v) { os << static_cast<float>(v); diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 1246abeaa..6dbae8cee 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -3373,6 +3373,11 @@ template <> EIGEN_STRONG_INLINE Packet4bf pmax<Packet4bf>(const Packet4bf &a, return F32ToBf16(pmax<Packet4f>(Bf16ToF32(a), Bf16ToF32(b))); } +template<> EIGEN_STRONG_INLINE Packet4bf plset<Packet4bf>(const bfloat16& a) +{ + return F32ToBf16(plset<Packet4f>(static_cast<float>(a))); +} + template<> EIGEN_STRONG_INLINE Packet4bf por(const Packet4bf& a,const Packet4bf& b) { return por<Packet4us>(a, b); } diff --git a/test/main.h b/test/main.h index 19e6f959d..e830d68b0 100644 --- a/test/main.h +++ b/test/main.h @@ -1,3 +1,4 @@ + // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // @@ -540,7 +541,7 @@ template<typename T1,typename T2> typename NumTraits<typename NumTraits<T1>::Real>::NonInteger test_relative_error(const T1 &a, const T2 &b, typename internal::enable_if<internal::is_arithmetic<typename NumTraits<T1>::Real>::value, T1>::type* = 0) { typedef typename NumTraits<typename NumTraits<T1>::Real>::NonInteger RealScalar; - return numext::sqrt(RealScalar(numext::abs2(a-b))/RealScalar((numext::mini)(numext::abs2(a),numext::abs2(b)))); + return numext::sqrt(RealScalar(numext::abs2(a-b))/(numext::mini)(RealScalar(numext::abs2(a)),RealScalar(numext::abs2(b)))); } template<typename T> diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 6cde7e87b..53c41c967 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -498,18 +498,18 @@ void packetmath_real() { EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4]; for (int i = 0; i < size; ++i) { - data1[i] = internal::random<Scalar>(0, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6)); - data2[i] = internal::random<Scalar>(0, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6)); + data1[i] = Scalar(internal::random<double>(0, 1) * std::pow(10., internal::random<double>(-6, 6))); + data2[i] = Scalar(internal::random<double>(0, 1) * std::pow(10., internal::random<double>(-6, 6))); } - if (internal::random<float>(0, 1) < 0.1f) data1[internal::random<int>(0, PacketSize)] = 0; + if (internal::random<float>(0, 1) < 0.1f) data1[internal::random<int>(0, PacketSize)] = Scalar(0); CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); - CHECK_CWISE1_IF(PacketTraits::HasRsqrt, Scalar(1) / std::sqrt, internal::prsqrt); + CHECK_CWISE1_IF(PacketTraits::HasRsqrt, 1 / std::sqrt, internal::prsqrt); for (int i = 0; i < size; ++i) { - data1[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-3, 3)); - data2[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-3, 3)); + data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-3, 3))); + data2[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-3, 3))); } CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin); CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos); @@ -522,42 +522,49 @@ void packetmath_real() { // See bug 1785. for (int i = 0; i < size; ++i) { - data1[i] = -1.5 + i; - data2[i] = -1.5 + i; + data1[i] = Scalar(-1.5 + i); + data2[i] = Scalar(-1.5 + i); } CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround); CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print); for (int i = 0; i < size; ++i) { - data1[i] = internal::random<Scalar>(-1, 1); - data2[i] = internal::random<Scalar>(-1, 1); + data1[i] = Scalar(internal::random<double>(-1, 1)); + data2[i] = Scalar(internal::random<double>(-1, 1)); } CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin); CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos); for (int i = 0; i < size; ++i) { - data1[i] = internal::random<Scalar>(-87, 88); - data2[i] = internal::random<Scalar>(-87, 88); + data1[i] = Scalar(internal::random<double>(-87, 88)); + data2[i] = Scalar(internal::random<double>(-87, 88)); } CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp); for (int i = 0; i < size; ++i) { - data1[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6)); - data2[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6)); + data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-6, 6))); + data2[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-6, 6))); } - data1[0] = 1e-20; + data1[0] = Scalar(1e-20); CHECK_CWISE1_IF(PacketTraits::HasTanh, std::tanh, internal::ptanh); if (PacketTraits::HasExp && PacketSize >= 2) { + const Scalar small = std::numeric_limits<Scalar>::epsilon(); data1[0] = std::numeric_limits<Scalar>::quiet_NaN(); - data1[1] = std::numeric_limits<Scalar>::epsilon(); + data1[1] = small; test::packet_helper<PacketTraits::HasExp, Packet> h; h.store(data2, internal::pexp(h.load(data1))); VERIFY((numext::isnan)(data2[0])); - VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::epsilon()), data2[1]); + // TODO(rmlarsen): Re-enable for bfloat16. + if (!internal::is_same<Scalar, bfloat16>::value) { + VERIFY_IS_EQUAL(std::exp(small), data2[1]); + } - data1[0] = -std::numeric_limits<Scalar>::epsilon(); - data1[1] = 0; + data1[0] = -small; + data1[1] = Scalar(0); h.store(data2, internal::pexp(h.load(data1))); - VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::epsilon()), data2[0]); + // TODO(rmlarsen): Re-enable for bfloat16. + if (!internal::is_same<Scalar, bfloat16>::value) { + VERIFY_IS_EQUAL(std::exp(-small), data2[0]); + } VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]); data1[0] = (std::numeric_limits<Scalar>::min)(); @@ -584,7 +591,7 @@ void packetmath_real() { if (PacketTraits::HasExp) { internal::scalar_logistic_op<Scalar> logistic; for (int i = 0; i < size; ++i) { - data1[i] = internal::random<Scalar>(-20, 20); + data1[i] = Scalar(internal::random<double>(-20, 20)); } test::packet_helper<PacketTraits::HasExp, Packet> h; @@ -613,7 +620,7 @@ void packetmath_real() { VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::epsilon()), data2[1]); data1[0] = -std::numeric_limits<Scalar>::epsilon(); - data1[1] = 0; + data1[1] = Scalar(0); h.store(data2, internal::plog(h.load(data1))); VERIFY((numext::isnan)(data2[0])); VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]); @@ -630,7 +637,8 @@ void packetmath_real() { data1[0] = std::numeric_limits<Scalar>::denorm_min(); data1[1] = -std::numeric_limits<Scalar>::denorm_min(); h.store(data2, internal::plog(h.load(data1))); - // VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]); + // TODO(rmlarsen): Reenable. + // VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]); VERIFY((numext::isnan)(data2[1])); } #endif @@ -654,17 +662,22 @@ void packetmath_real() { if (PacketTraits::HasSqrt) { test::packet_helper<PacketTraits::HasSqrt, Packet> h; data1[0] = Scalar(-1.0f); - data1[1] = -std::numeric_limits<Scalar>::denorm_min(); + if (std::numeric_limits<Scalar>::has_denorm == std::denorm_present) { + data1[1] = -std::numeric_limits<Scalar>::denorm_min(); + } else { + data1[1] = -std::numeric_limits<Scalar>::epsilon(); + } h.store(data2, internal::psqrt(h.load(data1))); VERIFY((numext::isnan)(data2[0])); VERIFY((numext::isnan)(data2[1])); } - if (PacketTraits::HasCos) { + // TODO(rmlarsen): Re-enable for bfloat16. + if (PacketTraits::HasCos && !internal::is_same<Scalar, bfloat16>::value) { test::packet_helper<PacketTraits::HasCos, Packet> h; - for (Scalar k = 1; k < Scalar(10000) / std::numeric_limits<Scalar>::epsilon(); k *= 2) { + for (Scalar k = Scalar(1); k < Scalar(10000) / std::numeric_limits<Scalar>::epsilon(); k *= Scalar(2)) { for (int k1 = 0; k1 <= 1; ++k1) { - data1[0] = (2 * k + k1) * Scalar(EIGEN_PI) / 2 * internal::random<Scalar>(0.8, 1.2); - data1[1] = (2 * k + 2 + k1) * Scalar(EIGEN_PI) / 2 * internal::random<Scalar>(0.8, 1.2); + data1[0] = Scalar((2 * k + k1) * EIGEN_PI / 2 * internal::random<double>(0.8, 1.2)); + data1[1] = Scalar((2 * k + 2 + k1) * EIGEN_PI / 2 * internal::random<double>(0.8, 1.2)); h.store(data2, internal::pcos(h.load(data1))); h.store(data2 + PacketSize, internal::psin(h.load(data1))); VERIFY(data2[0] <= Scalar(1.) && data2[0] >= Scalar(-1.)); @@ -765,16 +778,16 @@ void packetmath_real<bfloat16, typename internal::packet_traits<bfloat16>::type> template <typename Scalar> Scalar propagate_nan_max(const Scalar& a, const Scalar& b) { - if ((std::isnan)(a)) return a; - if ((std::isnan)(b)) return b; - return (std::max)(a,b); + if ((numext::isnan)(a)) return a; + if ((numext::isnan)(b)) return b; + return (numext::maxi)(a,b); } template <typename Scalar> Scalar propagate_nan_min(const Scalar& a, const Scalar& b) { - if ((std::isnan)(a)) return a; - if ((std::isnan)(b)) return b; - return (std::min)(a,b); + if ((numext::isnan)(a)) return a; + if ((numext::isnan)(b)) return b; + return (numext::mini)(a,b); } template <typename Scalar, typename Packet> diff --git a/test/packetmath_test_shared.h b/test/packetmath_test_shared.h index 7b8caedcb..f8dc3711c 100644 --- a/test/packetmath_test_shared.h +++ b/test/packetmath_test_shared.h @@ -156,7 +156,7 @@ struct packet_helper<false,Packet> #define CHECK_CWISE1_IF(COND, REFOP, POP) if(COND) { \ test::packet_helper<COND,Packet> h; \ for (int i=0; i<PacketSize; ++i) \ - ref[i] = REFOP(data1[i]); \ + ref[i] = Scalar(REFOP(data1[i])); \ h.store(data2, POP(h.load(data1))); \ VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \ } @@ -164,7 +164,7 @@ struct packet_helper<false,Packet> #define CHECK_CWISE2_IF(COND, REFOP, POP) if(COND) { \ test::packet_helper<COND,Packet> h; \ for (int i=0; i<PacketSize; ++i) \ - ref[i] = REFOP(data1[i], data1[i+PacketSize]); \ + ref[i] = Scalar(REFOP(data1[i], data1[i+PacketSize])); \ h.store(data2, POP(h.load(data1),h.load(data1+PacketSize))); \ VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \ } @@ -172,8 +172,8 @@ struct packet_helper<false,Packet> #define CHECK_CWISE3_IF(COND, REFOP, POP) if (COND) { \ test::packet_helper<COND, Packet> h; \ for (int i = 0; i < PacketSize; ++i) \ - ref[i] = \ - REFOP(data1[i], data1[i + PacketSize], data1[i + 2 * PacketSize]); \ + ref[i] = Scalar(REFOP(data1[i], data1[i + PacketSize], \ + data1[i + 2 * PacketSize])); \ h.store(data2, POP(h.load(data1), h.load(data1 + PacketSize), \ h.load(data1 + 2 * PacketSize))); \ VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \ |