aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-10-09 20:05:49 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-10-09 20:05:49 +0000
commit4e4d3f32d168ed9ce09d950f099a60ddcd11240f (patch)
tree3e52ae5b43c238679f69f3caf4d908d4afb16f13
parent7a8d3d5b81cb528f7f084b63686ffb20494053f6 (diff)
Clean up packetmath tests and fix various bugs to make bfloat16 pass (almost) all packetmath tests with SSE, AVX, and AVX512.
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h5
-rw-r--r--Eigen/src/Core/arch/AVX/TypeCasting.h48
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h7
-rw-r--r--Eigen/src/Core/arch/Default/BFloat16.h27
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h5
-rw-r--r--test/main.h3
-rw-r--r--test/packetmath.cpp81
-rw-r--r--test/packetmath_test_shared.h8
8 files changed, 114 insertions, 70 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index cf7146cbc..d5dc6a174 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -1205,6 +1205,11 @@ EIGEN_STRONG_INLINE Packet8bf pmax<Packet8bf>(const Packet8bf& a,
return F32ToBf16(pmax<Packet8f>(Bf16ToF32(a), Bf16ToF32(b)));
}
+template <>
+EIGEN_STRONG_INLINE Packet8bf plset<Packet8bf>(const bfloat16& a) {
+ return F32ToBf16(plset<Packet8f>(static_cast<float>(a)));
+}
+
template<> EIGEN_STRONG_INLINE Packet8bf por(const Packet8bf& a,const Packet8bf& b) {
return _mm_or_si128(a,b);
}
diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h
index c669a7f60..d507fb67b 100644
--- a/Eigen/src/Core/arch/AVX/TypeCasting.h
+++ b/Eigen/src/Core/arch/AVX/TypeCasting.h
@@ -35,23 +35,6 @@ struct type_casting_traits<int, float> {
};
-
-template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
- return _mm256_cvttps_epi32(a);
-}
-
-template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
- return _mm256_cvtepi32_ps(a);
-}
-
-template<> EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i,Packet8f>(const Packet8f& a) {
- return _mm256_castps_si256(a);
-}
-
-template<> EIGEN_STRONG_INLINE Packet8f preinterpret<Packet8f,Packet8i>(const Packet8i& a) {
- return _mm256_castsi256_ps(a);
-}
-
#ifndef EIGEN_VECTORIZE_AVX512
template <>
@@ -63,9 +46,6 @@ struct type_casting_traits<Eigen::half, float> {
};
};
-template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
- return half2float(a);
-}
template <>
struct type_casting_traits<float, Eigen::half> {
@@ -85,10 +65,6 @@ struct type_casting_traits<bfloat16, float> {
};
};
-template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8bf, Packet8f>(const Packet8bf& a) {
- return Bf16ToF32(a);
-}
-
template <>
struct type_casting_traits<float, bfloat16> {
enum {
@@ -100,6 +76,30 @@ struct type_casting_traits<float, bfloat16> {
#endif // EIGEN_VECTORIZE_AVX512
+template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
+ return _mm256_cvttps_epi32(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
+ return _mm256_cvtepi32_ps(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i,Packet8f>(const Packet8f& a) {
+ return _mm256_castps_si256(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f preinterpret<Packet8f,Packet8i>(const Packet8i& a) {
+ return _mm256_castsi256_ps(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
+ return half2float(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8bf, Packet8f>(const Packet8bf& a) {
+ return Bf16ToF32(a);
+}
+
template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
return float2half(a);
}
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 76f3366d7..8b946b3e1 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -1626,8 +1626,6 @@ template <> struct is_arithmetic<Packet16bf> { enum { value = true }; };
template <>
struct packet_traits<bfloat16> : default_packet_traits {
typedef Packet16bf type;
- // There is no half-size packet for current Packet16bf.
- // TODO: support as SSE path.
typedef Packet8bf half;
enum {
Vectorizable = 1,
@@ -1884,6 +1882,11 @@ EIGEN_STRONG_INLINE Packet16bf pmax<Packet16bf>(const Packet16bf& a,
}
template <>
+EIGEN_STRONG_INLINE Packet16bf plset<Packet16bf>(const bfloat16& a) {
+ return F32ToBf16(plset<Packet16f>(static_cast<float>(a)));
+}
+
+template <>
EIGEN_STRONG_INLINE Packet8bf predux_half_dowto4<Packet16bf>(const Packet16bf& a) {
Packet8bf lane0 = _mm256_extractf128_si256(a, 0);
Packet8bf lane1 = _mm256_extractf128_si256(a, 1);
diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h
index 7c147ae34..4d5fa1bf8 100644
--- a/Eigen/src/Core/arch/Default/BFloat16.h
+++ b/Eigen/src/Core/arch/Default/BFloat16.h
@@ -103,8 +103,8 @@ struct numeric_limits<Eigen::bfloat16> {
static const bool has_infinity = true;
static const bool has_quiet_NaN = true;
static const bool has_signaling_NaN = true;
- static const float_denorm_style has_denorm = numeric_limits<float>::has_denorm;
- static const bool has_denorm_loss = numeric_limits<float>::has_denorm_loss;
+ static const float_denorm_style has_denorm = std::denorm_absent;
+ static const bool has_denorm_loss = false;
static const std::float_round_style round_style = numeric_limits<float>::round_style;
static const bool is_iec559 = false;
static const bool is_bounded = true;
@@ -551,18 +551,24 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) {
}
#if EIGEN_HAS_CXX11_MATH
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) {
- return bfloat16(::asinh(float(a)));
+ EIGEN_USING_STD(asinhf);
+ return bfloat16(asinhf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) {
- return bfloat16(::acosh(float(a)));
+ EIGEN_USING_STD(acoshf);
+ return bfloat16(acoshf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) {
- return bfloat16(::atanh(float(a)));
+ EIGEN_USING_STD(atanhf);
+ return bfloat16(atanhf(float(a)));
}
#endif
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) {
return bfloat16(::floorf(float(a)));
}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) {
+ return bfloat16(::rintf(float(a)));
+}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) {
return bfloat16(::ceilf(float(a)));
}
@@ -581,6 +587,17 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bf
return f1 < f2 ? b : a;
}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfloat16& b) {
+ const float f1 = static_cast<float>(a);
+ const float f2 = static_cast<float>(b);
+ return bfloat16(::fminf(f1, f2));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) {
+ const float f1 = static_cast<float>(a);
+ const float f2 = static_cast<float>(b);
+ return bfloat16(::fmaxf(f1, f2));
+}
+
#ifndef EIGEN_NO_IO
EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const bfloat16& v) {
os << static_cast<float>(v);
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 1246abeaa..6dbae8cee 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -3373,6 +3373,11 @@ template <> EIGEN_STRONG_INLINE Packet4bf pmax<Packet4bf>(const Packet4bf &a,
return F32ToBf16(pmax<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));
}
+template<> EIGEN_STRONG_INLINE Packet4bf plset<Packet4bf>(const bfloat16& a)
+{
+ return F32ToBf16(plset<Packet4f>(static_cast<float>(a)));
+}
+
template<> EIGEN_STRONG_INLINE Packet4bf por(const Packet4bf& a,const Packet4bf& b) {
return por<Packet4us>(a, b);
}
diff --git a/test/main.h b/test/main.h
index 19e6f959d..e830d68b0 100644
--- a/test/main.h
+++ b/test/main.h
@@ -1,3 +1,4 @@
+
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
@@ -540,7 +541,7 @@ template<typename T1,typename T2>
typename NumTraits<typename NumTraits<T1>::Real>::NonInteger test_relative_error(const T1 &a, const T2 &b, typename internal::enable_if<internal::is_arithmetic<typename NumTraits<T1>::Real>::value, T1>::type* = 0)
{
typedef typename NumTraits<typename NumTraits<T1>::Real>::NonInteger RealScalar;
- return numext::sqrt(RealScalar(numext::abs2(a-b))/RealScalar((numext::mini)(numext::abs2(a),numext::abs2(b))));
+ return numext::sqrt(RealScalar(numext::abs2(a-b))/(numext::mini)(RealScalar(numext::abs2(a)),RealScalar(numext::abs2(b))));
}
template<typename T>
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 6cde7e87b..53c41c967 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -498,18 +498,18 @@ void packetmath_real() {
EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4];
for (int i = 0; i < size; ++i) {
- data1[i] = internal::random<Scalar>(0, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6));
- data2[i] = internal::random<Scalar>(0, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6));
+ data1[i] = Scalar(internal::random<double>(0, 1) * std::pow(10., internal::random<double>(-6, 6)));
+ data2[i] = Scalar(internal::random<double>(0, 1) * std::pow(10., internal::random<double>(-6, 6)));
}
- if (internal::random<float>(0, 1) < 0.1f) data1[internal::random<int>(0, PacketSize)] = 0;
+ if (internal::random<float>(0, 1) < 0.1f) data1[internal::random<int>(0, PacketSize)] = Scalar(0);
CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog);
- CHECK_CWISE1_IF(PacketTraits::HasRsqrt, Scalar(1) / std::sqrt, internal::prsqrt);
+ CHECK_CWISE1_IF(PacketTraits::HasRsqrt, 1 / std::sqrt, internal::prsqrt);
for (int i = 0; i < size; ++i) {
- data1[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-3, 3));
- data2[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-3, 3));
+ data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-3, 3)));
+ data2[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-3, 3)));
}
CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin);
CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos);
@@ -522,42 +522,49 @@ void packetmath_real() {
// See bug 1785.
for (int i = 0; i < size; ++i) {
- data1[i] = -1.5 + i;
- data2[i] = -1.5 + i;
+ data1[i] = Scalar(-1.5 + i);
+ data2[i] = Scalar(-1.5 + i);
}
CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround);
CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print);
for (int i = 0; i < size; ++i) {
- data1[i] = internal::random<Scalar>(-1, 1);
- data2[i] = internal::random<Scalar>(-1, 1);
+ data1[i] = Scalar(internal::random<double>(-1, 1));
+ data2[i] = Scalar(internal::random<double>(-1, 1));
}
CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin);
CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos);
for (int i = 0; i < size; ++i) {
- data1[i] = internal::random<Scalar>(-87, 88);
- data2[i] = internal::random<Scalar>(-87, 88);
+ data1[i] = Scalar(internal::random<double>(-87, 88));
+ data2[i] = Scalar(internal::random<double>(-87, 88));
}
CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp);
for (int i = 0; i < size; ++i) {
- data1[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6));
- data2[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6));
+ data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-6, 6)));
+ data2[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-6, 6)));
}
- data1[0] = 1e-20;
+ data1[0] = Scalar(1e-20);
CHECK_CWISE1_IF(PacketTraits::HasTanh, std::tanh, internal::ptanh);
if (PacketTraits::HasExp && PacketSize >= 2) {
+ const Scalar small = std::numeric_limits<Scalar>::epsilon();
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
- data1[1] = std::numeric_limits<Scalar>::epsilon();
+ data1[1] = small;
test::packet_helper<PacketTraits::HasExp, Packet> h;
h.store(data2, internal::pexp(h.load(data1)));
VERIFY((numext::isnan)(data2[0]));
- VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::epsilon()), data2[1]);
+ // TODO(rmlarsen): Re-enable for bfloat16.
+ if (!internal::is_same<Scalar, bfloat16>::value) {
+ VERIFY_IS_EQUAL(std::exp(small), data2[1]);
+ }
- data1[0] = -std::numeric_limits<Scalar>::epsilon();
- data1[1] = 0;
+ data1[0] = -small;
+ data1[1] = Scalar(0);
h.store(data2, internal::pexp(h.load(data1)));
- VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::epsilon()), data2[0]);
+ // TODO(rmlarsen): Re-enable for bfloat16.
+ if (!internal::is_same<Scalar, bfloat16>::value) {
+ VERIFY_IS_EQUAL(std::exp(-small), data2[0]);
+ }
VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]);
data1[0] = (std::numeric_limits<Scalar>::min)();
@@ -584,7 +591,7 @@ void packetmath_real() {
if (PacketTraits::HasExp) {
internal::scalar_logistic_op<Scalar> logistic;
for (int i = 0; i < size; ++i) {
- data1[i] = internal::random<Scalar>(-20, 20);
+ data1[i] = Scalar(internal::random<double>(-20, 20));
}
test::packet_helper<PacketTraits::HasExp, Packet> h;
@@ -613,7 +620,7 @@ void packetmath_real() {
VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::epsilon()), data2[1]);
data1[0] = -std::numeric_limits<Scalar>::epsilon();
- data1[1] = 0;
+ data1[1] = Scalar(0);
h.store(data2, internal::plog(h.load(data1)));
VERIFY((numext::isnan)(data2[0]));
VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]);
@@ -630,7 +637,8 @@ void packetmath_real() {
data1[0] = std::numeric_limits<Scalar>::denorm_min();
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
h.store(data2, internal::plog(h.load(data1)));
- // VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
+ // TODO(rmlarsen): Reenable.
+ // VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
VERIFY((numext::isnan)(data2[1]));
}
#endif
@@ -654,17 +662,22 @@ void packetmath_real() {
if (PacketTraits::HasSqrt) {
test::packet_helper<PacketTraits::HasSqrt, Packet> h;
data1[0] = Scalar(-1.0f);
- data1[1] = -std::numeric_limits<Scalar>::denorm_min();
+ if (std::numeric_limits<Scalar>::has_denorm == std::denorm_present) {
+ data1[1] = -std::numeric_limits<Scalar>::denorm_min();
+ } else {
+ data1[1] = -std::numeric_limits<Scalar>::epsilon();
+ }
h.store(data2, internal::psqrt(h.load(data1)));
VERIFY((numext::isnan)(data2[0]));
VERIFY((numext::isnan)(data2[1]));
}
- if (PacketTraits::HasCos) {
+ // TODO(rmlarsen): Re-enable for bfloat16.
+ if (PacketTraits::HasCos && !internal::is_same<Scalar, bfloat16>::value) {
test::packet_helper<PacketTraits::HasCos, Packet> h;
- for (Scalar k = 1; k < Scalar(10000) / std::numeric_limits<Scalar>::epsilon(); k *= 2) {
+ for (Scalar k = Scalar(1); k < Scalar(10000) / std::numeric_limits<Scalar>::epsilon(); k *= Scalar(2)) {
for (int k1 = 0; k1 <= 1; ++k1) {
- data1[0] = (2 * k + k1) * Scalar(EIGEN_PI) / 2 * internal::random<Scalar>(0.8, 1.2);
- data1[1] = (2 * k + 2 + k1) * Scalar(EIGEN_PI) / 2 * internal::random<Scalar>(0.8, 1.2);
+ data1[0] = Scalar((2 * k + k1) * EIGEN_PI / 2 * internal::random<double>(0.8, 1.2));
+ data1[1] = Scalar((2 * k + 2 + k1) * EIGEN_PI / 2 * internal::random<double>(0.8, 1.2));
h.store(data2, internal::pcos(h.load(data1)));
h.store(data2 + PacketSize, internal::psin(h.load(data1)));
VERIFY(data2[0] <= Scalar(1.) && data2[0] >= Scalar(-1.));
@@ -765,16 +778,16 @@ void packetmath_real<bfloat16, typename internal::packet_traits<bfloat16>::type>
template <typename Scalar>
Scalar propagate_nan_max(const Scalar& a, const Scalar& b) {
- if ((std::isnan)(a)) return a;
- if ((std::isnan)(b)) return b;
- return (std::max)(a,b);
+ if ((numext::isnan)(a)) return a;
+ if ((numext::isnan)(b)) return b;
+ return (numext::maxi)(a,b);
}
template <typename Scalar>
Scalar propagate_nan_min(const Scalar& a, const Scalar& b) {
- if ((std::isnan)(a)) return a;
- if ((std::isnan)(b)) return b;
- return (std::min)(a,b);
+ if ((numext::isnan)(a)) return a;
+ if ((numext::isnan)(b)) return b;
+ return (numext::mini)(a,b);
}
template <typename Scalar, typename Packet>
diff --git a/test/packetmath_test_shared.h b/test/packetmath_test_shared.h
index 7b8caedcb..f8dc3711c 100644
--- a/test/packetmath_test_shared.h
+++ b/test/packetmath_test_shared.h
@@ -156,7 +156,7 @@ struct packet_helper<false,Packet>
#define CHECK_CWISE1_IF(COND, REFOP, POP) if(COND) { \
test::packet_helper<COND,Packet> h; \
for (int i=0; i<PacketSize; ++i) \
- ref[i] = REFOP(data1[i]); \
+ ref[i] = Scalar(REFOP(data1[i])); \
h.store(data2, POP(h.load(data1))); \
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \
}
@@ -164,7 +164,7 @@ struct packet_helper<false,Packet>
#define CHECK_CWISE2_IF(COND, REFOP, POP) if(COND) { \
test::packet_helper<COND,Packet> h; \
for (int i=0; i<PacketSize; ++i) \
- ref[i] = REFOP(data1[i], data1[i+PacketSize]); \
+ ref[i] = Scalar(REFOP(data1[i], data1[i+PacketSize])); \
h.store(data2, POP(h.load(data1),h.load(data1+PacketSize))); \
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \
}
@@ -172,8 +172,8 @@ struct packet_helper<false,Packet>
#define CHECK_CWISE3_IF(COND, REFOP, POP) if (COND) { \
test::packet_helper<COND, Packet> h; \
for (int i = 0; i < PacketSize; ++i) \
- ref[i] = \
- REFOP(data1[i], data1[i + PacketSize], data1[i + 2 * PacketSize]); \
+ ref[i] = Scalar(REFOP(data1[i], data1[i + PacketSize], \
+ data1[i + 2 * PacketSize])); \
h.store(data2, POP(h.load(data1), h.load(data1 + PacketSize), \
h.load(data1 + 2 * PacketSize))); \
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \