aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2013-10-31 13:52:43 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2013-10-31 13:52:43 +0100
commit6dc0e59b1e320660acd9a3d930473170b743454a (patch)
tree19552e88fb38f448dd7d2a4995175b70a35ab277 /Eigen/src/Core/arch
parent2702788da71303afa71e53dfa6542a560115425a (diff)
Fix bug #677: compilation issue on arm64 which does not have the PLD instruction
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h20
1 files changed, 15 insertions, 5 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 163bac215..05e891df2 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -48,9 +48,19 @@ typedef uint32x4_t Packet4ui;
#define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
#endif
-
-#ifndef __pld
-#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
+
+
+// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
+// which available on LLVM and GCC (at least)
+#if (defined(__has_builtin) && __has_builtin(__builtin_prefetch)) || defined(__GNUC__)
+ #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
+#elif defined __pld
+ #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
+#elif !defined(__aarch64__)
+ #define EIGEN_ARM_PREFETCH(ADDR) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
+#else
+ // by default no explicit prefetching
+ #define EIGEN_ARM_PREFETCH(ADDR)
#endif
template<> struct packet_traits<float> : default_packet_traits
@@ -209,8 +219,8 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
-template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { __pld(addr); }
-template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { __pld(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); }
// FIXME only store the 2 first elements ?
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }