aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/NEON
diff options
context:
space:
mode:
authorGravatar Marton Danoczy <marton78@gmail.com>2011-11-04 16:37:10 +0100
committerGravatar Marton Danoczy <marton78@gmail.com>2011-11-04 16:37:10 +0100
commitf422668d39c51a7281e55404cfed4a262747eb51 (patch)
treed4b3a45d6ba622317ff2ec43afd39cd0007f4a19 /Eigen/src/Core/arch/NEON
parent1b98b7347248b959a76157a1da16ce70253fb597 (diff)
Patches to support ARM NEON with Clang 3.0 and LLVM-GCC
Diffstat (limited to 'Eigen/src/Core/arch/NEON')
-rw-r--r--Eigen/src/Core/arch/NEON/Complex.h4
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h58
2 files changed, 38 insertions, 24 deletions
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index 8e55548c9..212887184 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -27,8 +27,8 @@
namespace internal {
-static uint32x4_t p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
-static uint32x2_t p2ui_CONJ_XOR = { 0x00000000, 0x80000000 };
+static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
+static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
//---------- float ----------
struct Packet2cf
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 478ef8038..6c7cd1590 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -52,6 +52,16 @@ typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#if defined(__llvm__) && !defined(__clang__)
+ //Special treatment for Apple's llvm-gcc, its NEON packet types are unions
+ #define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}}
+ #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
+#else
+ //Default initializer for packets
+ #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
+ #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
+#endif
+
#ifndef __pld
#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
#endif
@@ -84,7 +94,7 @@ template<> struct packet_traits<int> : default_packet_traits
};
};
-#if EIGEN_GNUC_AT_MOST(4,4)
+#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__)
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
@@ -100,12 +110,12 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
{
- Packet4f countdown = { 0, 1, 2, 3 };
+ Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
return vaddq_f32(pset1<Packet4f>(a), countdown);
}
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
{
- Packet4i countdown = { 0, 1, 2, 3 };
+ Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
return vaddq_s32(pset1<Packet4i>(a), countdown);
}
@@ -395,25 +405,29 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
return s[0];
}
-template<int Offset>
-struct palign_impl<Offset,Packet4f>
-{
- EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
- {
- if (Offset!=0)
- first = vextq_f32(first, second, Offset);
- }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet4i>
-{
- EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
- {
- if (Offset!=0)
- first = vextq_s32(first, second, Offset);
- }
-};
+// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
+// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
+#define PALIGN_NEON(Offset,Type,Command) \
+template<>\
+struct palign_impl<Offset,Type>\
+{\
+ EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
+ {\
+ if (Offset!=0)\
+ first = Command(first, second, Offset);\
+ }\
+};\
+
+PALIGN_NEON(0,Packet4f,vextq_f32)
+PALIGN_NEON(1,Packet4f,vextq_f32)
+PALIGN_NEON(2,Packet4f,vextq_f32)
+PALIGN_NEON(3,Packet4f,vextq_f32)
+PALIGN_NEON(0,Packet4i,vextq_s32)
+PALIGN_NEON(1,Packet4i,vextq_s32)
+PALIGN_NEON(2,Packet4i,vextq_s32)
+PALIGN_NEON(3,Packet4i,vextq_s32)
+
+#undef PALIGN_NEON
} // end namespace internal