aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h
diff options
context:
space:
mode:
authorGravatar Antonio Sanchez <cantonios@google.com>2020-12-02 14:00:57 -0800
committerGravatar Antonio Sanchez <cantonios@google.com>2020-12-04 10:16:29 -0800
commite2f21465fea76a80966f12a20d0be36597f19b44 (patch)
tree1ae9b0e3ae489b028902166a343f796d196fde82 /unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h
parent305b8bd2777bda99f65791468f305b76021bf579 (diff)
Special function implementations for half/bfloat16 packets.
Current implementations fail to consider half-float packets, only half-float scalars. Added specializations for packets on AVX, AVX512 and NEON. Added tests to `special_packetmath`. The current `special_functions` tests would fail for half and bfloat16 due to lack of precision. The NEON tests also fail with precision issues and due to different handling of `sqrt(inf)`, so special functions bessel, ndtri have been disabled. Tested with AVX, AVX512.
Diffstat (limited to 'unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h')
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h34
1 files changed, 34 insertions, 0 deletions
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h
new file mode 100644
index 000000000..f8dda28fc
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h
@@ -0,0 +1,34 @@
+#ifndef EIGEN_NEON_SPECIALFUNCTIONS_H
+#define EIGEN_NEON_SPECIALFUNCTIONS_H
+
+namespace Eigen {
+namespace internal {
+
+#ifdef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+
+#define NEON_HALF_TO_FLOAT_FUNCTIONS(METHOD) \
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+Packet8hf METHOD<Packet8hf>(const Packet8hf& x) { \
+ const Packet4f lo = METHOD<Packet4f>(vcvt_f32_f16(vget_low_f16(x))); \
+ const Packet4f hi = METHOD<Packet4f>(vcvt_f32_f16(vget_high_f16(x))); \
+ return vcombine_f16(vcvt_f16_f32(lo), vcvt_f16_f32(hi)); \
+} \
+ \
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+Packet4hf METHOD<Packet4hf>(const Packet4hf& x) { \
+ return vcvt_f16_f32(METHOD<Packet4f>(vcvt_f32_f16(x))); \
+}
+
+NEON_HALF_TO_FLOAT_FUNCTIONS(perf)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pndtri)
+
+#undef NEON_HALF_TO_FLOAT_FUNCTIONS
+#endif
+
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, perf)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pndtri)
+
+} // namespace internal
+} // namespace Eigen
+
+#endif // EIGEN_NEON_SPECIALFUNCTIONS_H