aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/arch/Default/BFloat16.h10
-rw-r--r--test/bfloat16_float.cpp31
2 files changed, 26 insertions, 15 deletions
diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h
index f9c6e76a9..30c998249 100644
--- a/Eigen/src/Core/arch/Default/BFloat16.h
+++ b/Eigen/src/Core/arch/Default/BFloat16.h
@@ -34,8 +34,9 @@ namespace Eigen {
struct bfloat16;
-// Since we allow implicit conversion of bfloat16 to float and double, we
-// need to make the cast to complex a bit more explicit
+// explicit conversion operators are no available before C++11 so we first cast
+// bfloat16 to RealScalar rather than to std::complex<RealScalar> directly
+#if !EIGEN_HAS_CXX11
namespace internal {
template <typename RealScalar>
struct cast_impl<bfloat16, std::complex<RealScalar> > {
@@ -45,6 +46,7 @@ struct cast_impl<bfloat16, std::complex<RealScalar> > {
}
};
} // namespace internal
+#endif // EIGEN_HAS_CXX11
namespace bfloat16_impl {
@@ -129,10 +131,10 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base {
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
return static_cast<unsigned long long>(bfloat16_to_float(*this));
}
- EIGEN_DEVICE_FUNC operator float() const {
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
return bfloat16_impl::bfloat16_to_float(*this);
}
- EIGEN_DEVICE_FUNC operator double() const {
+ EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
return static_cast<double>(bfloat16_impl::bfloat16_to_float(*this));
}
template<typename RealScalar>
diff --git a/test/bfloat16_float.cpp b/test/bfloat16_float.cpp
index 96341929a..11fc31363 100644
--- a/test/bfloat16_float.cpp
+++ b/test/bfloat16_float.cpp
@@ -41,6 +41,19 @@ void test_truncate(float input, float expected_truncation, float expected_roundi
VERIFY_IS_EQUAL(expected_rounding, static_cast<float>(rounded));
}
+template<typename T>
+ void test_roundtrip() {
+ // Representable T round trip via bfloat16
+ VERIFY_IS_EQUAL(static_cast<T>(static_cast<bfloat16>(-std::numeric_limits<T>::infinity())), -std::numeric_limits<T>::infinity());
+ VERIFY_IS_EQUAL(static_cast<T>(static_cast<bfloat16>(std::numeric_limits<T>::infinity())), std::numeric_limits<T>::infinity());
+ VERIFY_IS_EQUAL(static_cast<T>(static_cast<bfloat16>(T(-1.0))), T(-1.0));
+ VERIFY_IS_EQUAL(static_cast<T>(static_cast<bfloat16>(T(-0.5))), T(-0.5));
+ VERIFY_IS_EQUAL(static_cast<T>(static_cast<bfloat16>(T(-0.0))), T(-0.0));
+ VERIFY_IS_EQUAL(static_cast<T>(static_cast<bfloat16>(T(1.0))), T(1.0));
+ VERIFY_IS_EQUAL(static_cast<T>(static_cast<bfloat16>(T(0.5))), T(0.5));
+ VERIFY_IS_EQUAL(static_cast<T>(static_cast<bfloat16>(T(0.0))), T(0.0));
+}
+
void test_conversion()
{
using Eigen::bfloat16_impl::__bfloat16_raw;
@@ -53,9 +66,9 @@ void test_conversion()
VERIFY_IS_EQUAL(bfloat16(3.40e38f).value, 0x7f80); // Becomes infinity.
// Verify round-to-nearest-even behavior.
- float val1 = bfloat16(__bfloat16_raw(0x3c00));
- float val2 = bfloat16(__bfloat16_raw(0x3c01));
- float val3 = bfloat16(__bfloat16_raw(0x3c02));
+ float val1 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c00)));
+ float val2 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c01)));
+ float val3 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c02)));
VERIFY_IS_EQUAL(bfloat16(0.5f * (val1 + val2)).value, 0x3c00);
VERIFY_IS_EQUAL(bfloat16(0.5f * (val2 + val3)).value, 0x3c02);
@@ -106,14 +119,10 @@ void test_conversion()
VERIFY_IS_EQUAL(static_cast<float>(bfloat16()), 0.0f);
// Representable floats round trip via bfloat16
- VERIFY_IS_EQUAL(static_cast<float>(static_cast<bfloat16>(-std::numeric_limits<float>::infinity())), -std::numeric_limits<float>::infinity());
- VERIFY_IS_EQUAL(static_cast<float>(static_cast<bfloat16>(std::numeric_limits<float>::infinity())), std::numeric_limits<float>::infinity());
- VERIFY_IS_EQUAL(static_cast<float>(static_cast<bfloat16>(-1.0f)), -1.0f);
- VERIFY_IS_EQUAL(static_cast<float>(static_cast<bfloat16>(-0.5f)), -0.5f);
- VERIFY_IS_EQUAL(static_cast<float>(static_cast<bfloat16>(-0.0f)), -0.0f);
- VERIFY_IS_EQUAL(static_cast<float>(static_cast<bfloat16>(1.0f)), 1.0f);
- VERIFY_IS_EQUAL(static_cast<float>(static_cast<bfloat16>(0.5f)), 0.5f);
- VERIFY_IS_EQUAL(static_cast<float>(static_cast<bfloat16>(0.0f)), 0.0f);
+ test_roundtrip<float>();
+ test_roundtrip<double>();
+ test_roundtrip<std::complex<float> >();
+ test_roundtrip<std::complex<double> >();
// Truncate test
test_truncate(