Special function implementations for half/bfloat16 packets.

Current implementations fail to consider half-float packets, only half-float scalars. Added specializations for packets on AVX, AVX512 and NEON. Added tests to `special_packetmath`. The current `special_functions` tests would fail for half and bfloat16 due to lack of precision. The NEON tests also fail with precision issues and due to different handling of `sqrt(inf)`, so special functions bessel, ndtri have been disabled. Tested with AVX, AVX512.
author: Antonio Sanchez <cantonios@google.com> 2020-12-02 14:00:57 -0800
committer: Antonio Sanchez <cantonios@google.com> 2020-12-04 10:16:29 -0800
commit: e2f21465fea76a80966f12a20d0be36597f19b44 (patch)
tree: 1ae9b0e3ae489b028902166a343f796d196fde82 /unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h
parent: 305b8bd2777bda99f65791468f305b76021bf579 (diff)
1 files changed, 34 insertions, 0 deletions
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h
new file mode 100644
index 000000000..f8dda28fc
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h
@@ -0,0 +1,34 @@
+#ifndef EIGEN_NEON_SPECIALFUNCTIONS_H
+#define EIGEN_NEON_SPECIALFUNCTIONS_H
+
+namespace Eigen {
+namespace internal {
+
+#ifdef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+
+#define NEON_HALF_TO_FLOAT_FUNCTIONS(METHOD)                            \
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                       \
+Packet8hf METHOD<Packet8hf>(const Packet8hf& x) {                       \
+  const Packet4f lo = METHOD<Packet4f>(vcvt_f32_f16(vget_low_f16(x)));  \
+  const Packet4f hi = METHOD<Packet4f>(vcvt_f32_f16(vget_high_f16(x))); \
+  return vcombine_f16(vcvt_f16_f32(lo), vcvt_f16_f32(hi));              \
+}                                                                       \
+                                                                        \
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                       \
+Packet4hf METHOD<Packet4hf>(const Packet4hf& x) {                       \
+  return vcvt_f16_f32(METHOD<Packet4f>(vcvt_f32_f16(x)));               \
+}
+
+NEON_HALF_TO_FLOAT_FUNCTIONS(perf)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pndtri)
+
+#undef NEON_HALF_TO_FLOAT_FUNCTIONS
+#endif
+
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, perf)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pndtri)
+
+}  // namespace internal
+}  // namespace Eigen
+
+#endif  // EIGEN_NEON_SPECIALFUNCTIONS_H
author	Antonio Sanchez <cantonios@google.com>	2020-12-02 14:00:57 -0800
committer	Antonio Sanchez <cantonios@google.com>	2020-12-04 10:16:29 -0800
commit	e2f21465fea76a80966f12a20d0be36597f19b44 (patch)
tree	1ae9b0e3ae489b028902166a343f796d196fde82 /unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h
parent	305b8bd2777bda99f65791468f305b76021bf579 (diff)