From 89f90b585d24b3c07946b4ffd8064e66ad5af94a Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Tue, 24 Nov 2020 16:28:07 -0800 Subject: AVX512 missing ops. This allows the `packetmath` tests to pass for AVX512 on skylake. Made `half` and `bfloat16` consistent in terms of ops they support. Note the `log` tests are currently disabled for `bfloat16` since they fail due to poor precision (they were previously disabled for `Packet8bf` via test function specialization -- I just removed that specialization and disabled it in the generic test). --- test/packetmath.cpp | 129 +++++++++++----------------------------------------- 1 file changed, 27 insertions(+), 102 deletions(-) (limited to 'test/packetmath.cpp') diff --git a/test/packetmath.cpp b/test/packetmath.cpp index d52f997dc..ae0ead820 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -618,7 +618,10 @@ void packetmath_real() { test::packet_helper h; h.store(data2, internal::plog(h.load(data1))); VERIFY((numext::isnan)(data2[0])); - VERIFY_IS_APPROX(std::log(std::numeric_limits::epsilon()), data2[1]); + // TODO(cantonios): Re-enable for bfloat16. + if (!internal::is_same::value) { + VERIFY_IS_APPROX(std::log(data1[1]), data2[1]); + } data1[0] = -std::numeric_limits::epsilon(); data1[1] = Scalar(0); @@ -629,7 +632,10 @@ void packetmath_real() { data1[0] = (std::numeric_limits::min)(); data1[1] = -(std::numeric_limits::min)(); h.store(data2, internal::plog(h.load(data1))); - VERIFY_IS_APPROX(std::log((std::numeric_limits::min)()), data2[0]); + // TODO(cantonios): Re-enable for bfloat16. + if (!internal::is_same::value) { + VERIFY_IS_APPROX(std::log((std::numeric_limits::min)()), data2[0]); + } VERIFY((numext::isnan)(data2[1])); // Note: 32-bit arm always flushes denorms to zero. @@ -731,54 +737,6 @@ void packetmath_real() { VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \ } -template <> -void packetmath_real::type>(){ - typedef internal::packet_traits PacketTraits; - typedef internal::packet_traits::type Packet; - - const int PacketSize = internal::unpacket_traits::size; - const int size = PacketSize * 4; - EIGEN_ALIGN_MAX bfloat16 data1[PacketSize * 4]; - EIGEN_ALIGN_MAX bfloat16 data2[PacketSize * 4]; - EIGEN_ALIGN_MAX bfloat16 ref[PacketSize * 4]; - - for (int i = 0; i < size; ++i) { - data1[i] = bfloat16(internal::random(0, 1) * std::pow(float(10), internal::random(-6, 6))); - data2[i] = bfloat16(internal::random(0, 1) * std::pow(float(10), internal::random(-6, 6))); - data1[i] = bfloat16(0); - } - - if (internal::random(0, 1) < 0.1f) data1[internal::random(0, PacketSize)] = bfloat16(0); - - CAST_CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog, bfloat16, float); - CAST_CHECK_CWISE1_IF(PacketTraits::HasRsqrt, float(1) / std::sqrt, internal::prsqrt, bfloat16, float); - - for (int i = 0; i < size; ++i) { - data1[i] = bfloat16(internal::random(-1, 1) * std::pow(float(10), internal::random(-3, 3))); - data2[i] = bfloat16(internal::random(-1, 1) * std::pow(float(10), internal::random(-3, 3))); - } - CAST_CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin, bfloat16, float); - CAST_CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos, bfloat16, float); - CAST_CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan, bfloat16, float); - - CAST_CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround, bfloat16, float); - CAST_CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil, bfloat16, float); - CAST_CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor, bfloat16, float); - - for (int i = 0; i < size; ++i) { - data1[i] = bfloat16(-1.5 + i); - data2[i] = bfloat16(-1.5 + i); - } - CAST_CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround, bfloat16, float); - - for (int i = 0; i < size; ++i) { - data1[i] = bfloat16(internal::random(-87, 88)); - data2[i] = bfloat16(internal::random(-87, 88)); - } - CAST_CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp, bfloat16, float); - -} - template Scalar propagate_nan_max(const Scalar& a, const Scalar& b) { if ((numext::isnan)(a)) return a; @@ -793,6 +751,20 @@ Scalar propagate_nan_min(const Scalar& a, const Scalar& b) { return (numext::mini)(a,b); } +template +Scalar propagate_number_max(const Scalar& a, const Scalar& b) { + if ((numext::isnan)(a)) return b; + if ((numext::isnan)(b)) return a; + return (numext::maxi)(a,b); +} + +template +Scalar propagate_number_min(const Scalar& a, const Scalar& b) { + if ((numext::isnan)(a)) return b; + if ((numext::isnan)(b)) return a; + return (numext::mini)(a,b); +} + template void packetmath_notcomplex() { typedef internal::packet_traits PacketTraits; @@ -809,15 +781,9 @@ void packetmath_notcomplex() { CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin); CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax); -#if EIGEN_HAS_CXX11_MATH - using std::fmin; - using std::fmax; -#else - using ::fmin; - using ::fmax; -#endif - CHECK_CWISE2_IF(PacketTraits::HasMin, fmin, (internal::pmin)); - CHECK_CWISE2_IF(PacketTraits::HasMax, fmax, internal::pmax); + + CHECK_CWISE2_IF(PacketTraits::HasMin, propagate_number_min, internal::pmin); + CHECK_CWISE2_IF(PacketTraits::HasMax, propagate_number_max, internal::pmax); CHECK_CWISE1(numext::abs, internal::pabs); CHECK_CWISE2_IF(PacketTraits::HasAbsDiff, REF_ABS_DIFF, internal::pabsdiff); @@ -890,54 +856,13 @@ void packetmath_notcomplex() { data1[i + PacketSize] = internal::random() ? std::numeric_limits::quiet_NaN() : Scalar(0); } // Note: NaN propagation is implementation defined for pmin/pmax, so we do not test it here. - CHECK_CWISE2_IF(PacketTraits::HasMin, fmin, (internal::pmin)); - CHECK_CWISE2_IF(PacketTraits::HasMax, fmax, internal::pmax); + CHECK_CWISE2_IF(PacketTraits::HasMin, propagate_number_min, (internal::pmin)); + CHECK_CWISE2_IF(PacketTraits::HasMax, propagate_number_max, internal::pmax); CHECK_CWISE2_IF(PacketTraits::HasMin, propagate_nan_min, (internal::pmin)); CHECK_CWISE2_IF(PacketTraits::HasMax, propagate_nan_max, internal::pmax); } } -template <> -void packetmath_notcomplex::type>(){ - typedef bfloat16 Scalar; - typedef internal::packet_traits::type Packet; - typedef internal::packet_traits PacketTraits; - const int PacketSize = internal::unpacket_traits::size; - - EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4]; - EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4]; - EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4]; - Array::Map(data1, PacketSize * 4).setRandom(); - - ref[0] = data1[0]; - for (int i = 0; i < PacketSize; ++i) ref[0] = (std::min)(ref[0], data1[i]); - VERIFY(internal::isApprox(ref[0], internal::predux_min(internal::pload(data1))) && "internal::predux_min"); - - VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin); - VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax); - - CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin); - CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax); - CHECK_CWISE1(numext::abs, internal::pabs); - CHECK_CWISE2_IF(PacketTraits::HasAbsDiff, REF_ABS_DIFF, internal::pabsdiff); - - ref[0] = data1[0]; - for (int i = 0; i < PacketSize; ++i) ref[0] = (std::max)(ref[0], data1[i]); - VERIFY(internal::isApprox(ref[0], internal::predux_max(internal::pload(data1))) && "internal::predux_max"); - - { - unsigned char* data1_bits = reinterpret_cast(data1); - // predux_any - for (unsigned int i = 0; i < PacketSize * sizeof(Scalar); ++i) data1_bits[i] = 0x0; - VERIFY((!internal::predux_any(internal::pload(data1))) && "internal::predux_any(0000)"); - for (int k = 0; k < PacketSize; ++k) { - for (unsigned int i = 0; i < sizeof(Scalar); ++i) data1_bits[k * sizeof(Scalar) + i] = 0xff; - VERIFY(internal::predux_any(internal::pload(data1)) && "internal::predux_any(0101)"); - for (unsigned int i = 0; i < sizeof(Scalar); ++i) data1_bits[k * sizeof(Scalar) + i] = 0x00; - } - } -} - template void test_conj_helper(Scalar* data1, Scalar* data2, Scalar* ref, Scalar* pval) { const int PacketSize = internal::unpacket_traits::size; -- cgit v1.2.3