diff options
author | Antonio Sanchez <cantonios@google.com> | 2020-12-02 14:00:57 -0800 |
---|---|---|
committer | Antonio Sanchez <cantonios@google.com> | 2020-12-04 10:16:29 -0800 |
commit | e2f21465fea76a80966f12a20d0be36597f19b44 (patch) | |
tree | 1ae9b0e3ae489b028902166a343f796d196fde82 /unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h | |
parent | 305b8bd2777bda99f65791468f305b76021bf579 (diff) |
Special function implementations for half/bfloat16 packets.
Current implementations fail to consider half-float packets, only
half-float scalars. Added specializations for packets on AVX, AVX512 and
NEON. Added tests to `special_packetmath`.
The current `special_functions` tests would fail for half and bfloat16 due to
lack of precision. The NEON tests also fail with precision issues and
due to different handling of `sqrt(inf)`, so special functions bessel, ndtri
have been disabled.
Tested with AVX, AVX512.
Diffstat (limited to 'unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h')
-rw-r--r-- | unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h new file mode 100644 index 000000000..f8dda28fc --- /dev/null +++ b/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h @@ -0,0 +1,34 @@ +#ifndef EIGEN_NEON_SPECIALFUNCTIONS_H +#define EIGEN_NEON_SPECIALFUNCTIONS_H + +namespace Eigen { +namespace internal { + +#ifdef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC + +#define NEON_HALF_TO_FLOAT_FUNCTIONS(METHOD) \ +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +Packet8hf METHOD<Packet8hf>(const Packet8hf& x) { \ + const Packet4f lo = METHOD<Packet4f>(vcvt_f32_f16(vget_low_f16(x))); \ + const Packet4f hi = METHOD<Packet4f>(vcvt_f32_f16(vget_high_f16(x))); \ + return vcombine_f16(vcvt_f16_f32(lo), vcvt_f16_f32(hi)); \ +} \ + \ +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +Packet4hf METHOD<Packet4hf>(const Packet4hf& x) { \ + return vcvt_f16_f32(METHOD<Packet4f>(vcvt_f32_f16(x))); \ +} + +NEON_HALF_TO_FLOAT_FUNCTIONS(perf) +NEON_HALF_TO_FLOAT_FUNCTIONS(pndtri) + +#undef NEON_HALF_TO_FLOAT_FUNCTIONS +#endif + +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, perf) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pndtri) + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_NEON_SPECIALFUNCTIONS_H |