diff options
author | Gael Guennebaud <g.gael@free.fr> | 2013-10-31 13:52:43 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2013-10-31 13:52:43 +0100 |
commit | 6dc0e59b1e320660acd9a3d930473170b743454a (patch) | |
tree | 19552e88fb38f448dd7d2a4995175b70a35ab277 /Eigen/src/Core | |
parent | 2702788da71303afa71e53dfa6542a560115425a (diff) |
Fix bug #677: compilation issue on arm64 which does not have the PLD instruction
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 20 |
1 files changed, 15 insertions, 5 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 163bac215..05e891df2 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -48,9 +48,19 @@ typedef uint32x4_t Packet4ui; #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y} #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W} #endif - -#ifndef __pld -#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" ); + + +// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function +// which available on LLVM and GCC (at least) +#if (defined(__has_builtin) && __has_builtin(__builtin_prefetch)) || defined(__GNUC__) + #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR); +#elif defined __pld + #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR) +#elif !defined(__aarch64__) + #define EIGEN_ARM_PREFETCH(ADDR) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); +#else + // by default no explicit prefetching + #define EIGEN_ARM_PREFETCH(ADDR) #endif template<> struct packet_traits<float> : default_packet_traits @@ -209,8 +219,8 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } -template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { __pld(addr); } -template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { __pld(addr); } +template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); } +template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); } // FIXME only store the 2 first elements ? template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } |