diff options
author | Antonio Sanchez <cantonios@google.com> | 2020-06-19 11:34:47 -0700 |
---|---|---|
committer | Antonio Sanchez <cantonios@google.com> | 2020-06-22 11:24:43 -0700 |
commit | ff4e7a0820365ee4b98cce71b7061009975b60f3 (patch) | |
tree | 377905344cba1616e2da0aa8630046f328debd05 /Eigen/src/Core/arch/NEON/PacketMath.h | |
parent | 03ebdf6acbad3eb7d272311522ba6414c47a6ef2 (diff) |
Add missing Packet2l/Packet2ul ops for NEON.
The current multiply (`pmul`) and comparison operators (`pcmp_lt`,
`pcmp_le`, `pcmp_eq`) are missing for packets `Packet2l` and
`Packet2ul`. This leads to compile errors for the `packetmath.cpp` tests
in clang. Here we add and test the missing ops.
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_9=1' '-DEIGEN_TEST_PART_10=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
$ arm-linux-gnueabihf-g++ -mfpu=neon -static -I./ '-DEIGEN_TEST_PART_9=1' '-DEIGEN_TEST_PART_10=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
$ clang++ -target aarch64-linux-android21 -static -I./ '-DEIGEN_TEST_PART_9=1' '-DEIGEN_TEST_PART_10=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
$ clang++ -target armv7-linux-android21 -static -mfpu=neon -I./ '-DEIGEN_TEST_PART_9=1' '-DEIGEN_TEST_PART_10=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
Diffstat (limited to 'Eigen/src/Core/arch/NEON/PacketMath.h')
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 72 |
1 files changed, 71 insertions, 1 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 340e1b1c5..80ccd261b 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -389,7 +389,7 @@ struct packet_traits<uint64_t> : default_packet_traits }; #if EIGEN_GNUC_AT_MOST(4, 4) && !EIGEN_COMP_LLVM -// workaround gcc 4.2, 4.3 and 4.4 compilatin issue +// workaround gcc 4.2, 4.3 and 4.4 compilation issue EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); } EIGEN_STRONG_INLINE float32x2_t vld1_f32(const float* x) { return ::vld1_f32 ((const float32_t*)x); } EIGEN_STRONG_INLINE float32x2_t vld1_dup_f32(const float* x) { return ::vld1_dup_f32 ((const float32_t*)x); } @@ -867,6 +867,16 @@ template<> EIGEN_STRONG_INLINE Packet2i pmul<Packet2i>(const Packet2i& a, const template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); } template<> EIGEN_STRONG_INLINE Packet2ui pmul<Packet2ui>(const Packet2ui& a, const Packet2ui& b) { return vmul_u32(a,b); } template<> EIGEN_STRONG_INLINE Packet4ui pmul<Packet4ui>(const Packet4ui& a, const Packet4ui& b) { return vmulq_u32(a,b); } +template<> EIGEN_STRONG_INLINE Packet2l pmul<Packet2l>(const Packet2l& a, const Packet2l& b) { + return vcombine_s64( + vdup_n_s64(vgetq_lane_s64(a, 0)*vgetq_lane_s64(b, 0)), + vdup_n_s64(vgetq_lane_s64(a, 1)*vgetq_lane_s64(b, 1))); +} +template<> EIGEN_STRONG_INLINE Packet2ul pmul<Packet2ul>(const Packet2ul& a, const Packet2ul& b) { + return vcombine_u64( + vdup_n_u64(vgetq_lane_u64(a, 0)*vgetq_lane_u64(b, 0)), + vdup_n_u64(vgetq_lane_u64(a, 1)*vgetq_lane_u64(b, 1))); +} template<> EIGEN_STRONG_INLINE Packet2f pdiv<Packet2f>(const Packet2f& a, const Packet2f& b) { @@ -1233,6 +1243,26 @@ template<> EIGEN_STRONG_INLINE Packet2ui pcmp_le<Packet2ui>(const Packet2ui& a, { return vcle_u32(a,b); } template<> EIGEN_STRONG_INLINE Packet4ui pcmp_le<Packet4ui>(const Packet4ui& a, const Packet4ui& b) { return vcleq_u32(a,b); } +template<> EIGEN_STRONG_INLINE Packet2l pcmp_le<Packet2l>(const Packet2l& a, const Packet2l& b) +{ +#if EIGEN_ARCH_ARM64 + return vreinterpretq_s64_u64(vcleq_s64(a,b)); +#else + return vcombine_s64( + vdup_n_s64(vgetq_lane_s64(a, 0) <= vgetq_lane_s64(b, 0) ? numext::int64_t(-1) : 0), + vdup_n_s64(vgetq_lane_s64(a, 1) <= vgetq_lane_s64(b, 1) ? numext::int64_t(-1) : 0)); +#endif +} +template<> EIGEN_STRONG_INLINE Packet2ul pcmp_le<Packet2ul>(const Packet2ul& a, const Packet2ul& b) +{ +#if EIGEN_ARCH_ARM64 + return vcleq_u64(a,b); +#else + return vcombine_u64( + vdup_n_u64(vgetq_lane_u64(a, 0) <= vgetq_lane_u64(b, 0) ? numext::uint64_t(-1) : 0), + vdup_n_u64(vgetq_lane_u64(a, 1) <= vgetq_lane_u64(b, 1) ? numext::uint64_t(-1) : 0)); +#endif +} template<> EIGEN_STRONG_INLINE Packet2f pcmp_lt<Packet2f>(const Packet2f& a, const Packet2f& b) { return vreinterpret_f32_u32(vclt_f32(a,b)); } @@ -1274,6 +1304,26 @@ template<> EIGEN_STRONG_INLINE Packet2ui pcmp_lt<Packet2ui>(const Packet2ui& a, { return vclt_u32(a,b); } template<> EIGEN_STRONG_INLINE Packet4ui pcmp_lt<Packet4ui>(const Packet4ui& a, const Packet4ui& b) { return vcltq_u32(a,b); } +template<> EIGEN_STRONG_INLINE Packet2l pcmp_lt<Packet2l>(const Packet2l& a, const Packet2l& b) +{ +#if EIGEN_ARCH_ARM64 + return vreinterpretq_s64_u64(vcltq_s64(a,b)); +#else + return vcombine_s64( + vdup_n_s64(vgetq_lane_s64(a, 0) < vgetq_lane_s64(b, 0) ? numext::int64_t(-1) : 0), + vdup_n_s64(vgetq_lane_s64(a, 1) < vgetq_lane_s64(b, 1) ? numext::int64_t(-1) : 0)); +#endif +} +template<> EIGEN_STRONG_INLINE Packet2ul pcmp_lt<Packet2ul>(const Packet2ul& a, const Packet2ul& b) +{ +#if EIGEN_ARCH_ARM64 + return vcltq_u64(a,b); +#else + return vcombine_u64( + vdup_n_u64(vgetq_lane_u64(a, 0) < vgetq_lane_u64(b, 0) ? numext::uint64_t(-1) : 0), + vdup_n_u64(vgetq_lane_u64(a, 1) < vgetq_lane_u64(b, 1) ? numext::uint64_t(-1) : 0)); +#endif +} template<> EIGEN_STRONG_INLINE Packet2f pcmp_eq<Packet2f>(const Packet2f& a, const Packet2f& b) { return vreinterpret_f32_u32(vceq_f32(a,b)); } @@ -1315,6 +1365,26 @@ template<> EIGEN_STRONG_INLINE Packet2ui pcmp_eq<Packet2ui>(const Packet2ui& a, { return vceq_u32(a,b); } template<> EIGEN_STRONG_INLINE Packet4ui pcmp_eq<Packet4ui>(const Packet4ui& a, const Packet4ui& b) { return vceqq_u32(a,b); } +template<> EIGEN_STRONG_INLINE Packet2l pcmp_eq<Packet2l>(const Packet2l& a, const Packet2l& b) +{ +#if EIGEN_ARCH_ARM64 + return vreinterpretq_s64_u64(vceqq_s64(a,b)); +#else + return vcombine_s64( + vdup_n_s64(vgetq_lane_s64(a, 0) == vgetq_lane_s64(b, 0) ? numext::int64_t(-1) : 0), + vdup_n_s64(vgetq_lane_s64(a, 1) == vgetq_lane_s64(b, 1) ? numext::int64_t(-1) : 0)); +#endif +} +template<> EIGEN_STRONG_INLINE Packet2ul pcmp_eq<Packet2ul>(const Packet2ul& a, const Packet2ul& b) +{ +#if EIGEN_ARCH_ARM64 + return vceqq_u64(a,b); +#else + return vcombine_u64( + vdup_n_u64(vgetq_lane_u64(a, 0) == vgetq_lane_u64(b, 0) ? numext::uint64_t(-1) : 0), + vdup_n_u64(vgetq_lane_u64(a, 1) == vgetq_lane_u64(b, 1) ? numext::uint64_t(-1) : 0)); +#endif +} template<> EIGEN_STRONG_INLINE Packet2f pcmp_lt_or_nan<Packet2f>(const Packet2f& a, const Packet2f& b) { return vreinterpret_f32_u32(vmvn_u32(vcge_f32(a,b))); } |