diff options
author | 2020-09-18 17:38:58 +0000 | |
---|---|---|
committer | 2020-09-18 17:38:58 +0000 | |
commit | e55182ac09885d7558adf75e9e230b051a721c18 (patch) | |
tree | 308a5297ef1f1632ebed05c982f98ba6942345b3 /Eigen/src/Core/arch | |
parent | 14022f5eb5304e23ebe10284f07bb1387570dc5e (diff) |
Get rid of initialization logic for blueNorm by making the computed constants static const or constexpr.
Move macro definition EIGEN_CONSTEXPR to Core and make all methods in NumTraits constexpr when EIGEN_HASH_CONSTEXPR is 1.
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r-- | Eigen/src/Core/arch/Default/BFloat16.h | 41 | ||||
-rw-r--r-- | Eigen/src/Core/arch/Default/Half.h | 44 |
2 files changed, 43 insertions, 42 deletions
diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h index 3b36c2f23..8706c992a 100644 --- a/Eigen/src/Core/arch/Default/BFloat16.h +++ b/Eigen/src/Core/arch/Default/BFloat16.h @@ -31,12 +31,12 @@ namespace bfloat16_impl { // Make our own __bfloat16_raw definition. struct __bfloat16_raw { - EIGEN_DEVICE_FUNC __bfloat16_raw() : value(0) {} - explicit EIGEN_DEVICE_FUNC __bfloat16_raw(unsigned short raw) : value(raw) {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() : value(0) {} + explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : value(raw) {} unsigned short value; }; -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value); template <bool AssumeArgumentIsNormalOrInfinityOrZero> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff); // Forward declarations of template specializations, to avoid Visual C++ 2019 errors, saying: @@ -48,8 +48,8 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h); struct bfloat16_base : public __bfloat16_raw { - EIGEN_DEVICE_FUNC bfloat16_base() {} - EIGEN_DEVICE_FUNC bfloat16_base(const __bfloat16_raw& h) : __bfloat16_raw(h) {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base() {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base(const __bfloat16_raw& h) : __bfloat16_raw(h) {} }; } // namespace bfloat16_impl @@ -59,15 +59,15 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base { typedef bfloat16_impl::__bfloat16_raw __bfloat16_raw; - EIGEN_DEVICE_FUNC bfloat16() {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16() {} - EIGEN_DEVICE_FUNC bfloat16(const __bfloat16_raw& h) : bfloat16_impl::bfloat16_base(h) {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const __bfloat16_raw& h) : bfloat16_impl::bfloat16_base(h) {} - explicit EIGEN_DEVICE_FUNC bfloat16(bool b) + explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(bool b) : bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {} template<class T> - explicit EIGEN_DEVICE_FUNC bfloat16(const T& val) + explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const T& val) : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {} explicit EIGEN_DEVICE_FUNC bfloat16(float f) @@ -76,7 +76,7 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base { // Following the convention of numpy, converting between complex and // float will lead to loss of imag value. template<typename RealScalar> - explicit EIGEN_DEVICE_FUNC bfloat16(const std::complex<RealScalar>& val) + explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const std::complex<RealScalar>& val) : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(static_cast<float>(val.real()))) {} EIGEN_DEVICE_FUNC operator float() const { // NOLINT: Allow implicit conversion to float, because it is lossless. @@ -272,10 +272,8 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const return output; } -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value) { - __bfloat16_raw h; - h.value = value; - return h; +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value) { + return __bfloat16_raw(value); } // float_to_bfloat16_rtne template specialization that does not make any @@ -619,20 +617,23 @@ template<> struct NumTraits<Eigen::bfloat16> RequireInitialization = false }; - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::bfloat16 epsilon() { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 epsilon() { return bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() { return Eigen::bfloat16(5e-2f); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() { + return bfloat16_impl::raw_uint16_to_bfloat16(0x3D4D); // bfloat16(5e-2f); + + } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() { return bfloat16_impl::raw_uint16_to_bfloat16(0x7F7F); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::bfloat16 lowest() { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 lowest() { return bfloat16_impl::raw_uint16_to_bfloat16(0xFF7F); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::bfloat16 infinity() { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 infinity() { return bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::bfloat16 quiet_NaN() { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 quiet_NaN() { return bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); } }; diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h index 60f19749b..5fcc81ba2 100644 --- a/Eigen/src/Core/arch/Default/Half.h +++ b/Eigen/src/Core/arch/Default/Half.h @@ -67,8 +67,8 @@ namespace half_impl { #if !defined(EIGEN_HAS_GPU_FP16) // Make our own __half_raw definition that is similar to CUDA's. struct __half_raw { - EIGEN_DEVICE_FUNC __half_raw() : x(0) {} - explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw() : x(0) {} + explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw(unsigned short raw) : x(raw) {} unsigned short x; }; #elif defined(EIGEN_HAS_HIP_FP16) @@ -85,20 +85,20 @@ typedef cl::sycl::half __half_raw; #endif -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(unsigned short x); EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff); EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h); struct half_base : public __half_raw { - EIGEN_DEVICE_FUNC half_base() {} - EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw(h) {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base() {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base(const __half_raw& h) : __half_raw(h) {} #if defined(EIGEN_HAS_GPU_FP16) #if defined(EIGEN_HAS_HIP_FP16) - EIGEN_DEVICE_FUNC half_base(const __half& h) { x = __half_as_ushort(h); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base(const __half& h) { x = __half_as_ushort(h); } #elif defined(EIGEN_HAS_CUDA_FP16) #if (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000) - EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {} #endif #endif #endif @@ -125,22 +125,22 @@ struct half : public half_impl::half_base { #endif #endif - EIGEN_DEVICE_FUNC half() {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half() {} - EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base(h) {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(const __half_raw& h) : half_impl::half_base(h) {} #if defined(EIGEN_HAS_GPU_FP16) #if defined(EIGEN_HAS_HIP_FP16) - EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(const __half& h) : half_impl::half_base(h) {} #elif defined(EIGEN_HAS_CUDA_FP16) #if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000 - EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {} + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(const __half& h) : half_impl::half_base(h) {} #endif #endif #endif - explicit EIGEN_DEVICE_FUNC half(bool b) + explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(bool b) : half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {} template<class T> explicit EIGEN_DEVICE_FUNC half(const T& val) @@ -417,10 +417,8 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) { // these in hardware. If we need more performance on older/other CPUs, they are // also possible to vectorize directly. -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x) { - __half_raw h; - h.x = x; - return h; +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(unsigned short x) { + return __half_raw(x); } union float32_bits { @@ -666,20 +664,22 @@ template<> struct NumTraits<Eigen::half> RequireInitialization = false }; - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half epsilon() { return half_impl::raw_uint16_to_half(0x0800); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return Eigen::half(1e-2f); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { + return half_impl::raw_uint16_to_half(0x211f); // Eigen::half(1e-2f); + } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half highest() { return half_impl::raw_uint16_to_half(0x7bff); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half lowest() { return half_impl::raw_uint16_to_half(0xfbff); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half infinity() { return half_impl::raw_uint16_to_half(0x7c00); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() { return half_impl::raw_uint16_to_half(0x7c01); } }; |