diff options
author | Gael Guennebaud <g.gael@free.fr> | 2017-02-17 14:39:02 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2017-02-17 14:39:02 +0100 |
commit | cbbf88c4d7bf5c7c74658ae3294e3880106b83b0 (patch) | |
tree | efe708cf452efc3250f344fc90d1fcd576b1b991 /Eigen/src/Core/arch/NEON/PacketMath.h | |
parent | 582b5e39bf5515a0277b8ecf9c40e09748dc7d98 (diff) |
Use int32_t instead of int in NEON code. Some platforms with 16 bytes int supports ARM NEON.
Diffstat (limited to 'Eigen/src/Core/arch/NEON/PacketMath.h')
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 52 |
1 files changed, 26 insertions, 26 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index d392bf3ff..84a56bdcc 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -46,7 +46,7 @@ typedef uint32x4_t Packet4ui; const Packet4f p4f_##NAME = pset1<Packet4f>(X) #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ - const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X)) + const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X)) #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ const Packet4i p4i_##NAME = pset1<Packet4i>(X) @@ -83,7 +83,7 @@ template<> struct packet_traits<float> : default_packet_traits HasSqrt = 0 }; }; -template<> struct packet_traits<int> : default_packet_traits +template<> struct packet_traits<int32_t> : default_packet_traits { typedef Packet4i type; typedef Packet4i half; // Packet2i intrinsics not implemented yet @@ -105,11 +105,11 @@ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); } #endif -template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; -template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; +template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; +template<> struct unpacket_traits<Packet4i> { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) { return vdupq_n_s32(from); } template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { @@ -117,7 +117,7 @@ template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) Packet4f countdown = vld1q_f32(f); return vaddq_f32(pset1<Packet4f>(a), countdown); } -template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) +template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a) { const int32_t i[] = {0, 1, 2, 3}; Packet4i countdown = vld1q_s32(i); @@ -240,20 +240,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, con } template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) +template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) { float32x2_t lo, hi; lo = vld1_dup_f32(from); hi = vld1_dup_f32(from+1); return vcombine_f32(lo, hi); } -template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from) +template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from) { int32x2_t lo, hi; lo = vld1_dup_s32(from); @@ -261,11 +261,11 @@ template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from) return vcombine_s32(lo, hi); } -template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } -template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } +template<> EIGEN_STRONG_INLINE void pstore<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } +template<> EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } -template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } -template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) { @@ -276,7 +276,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa res = vsetq_lane_f32(from[3*stride], res, 3); return res; } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride) { Packet4i res = pset1<Packet4i>(0); res = vsetq_lane_s32(from[0*stride], res, 0); @@ -293,7 +293,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, co to[stride*2] = vgetq_lane_f32(from, 2); to[stride*3] = vgetq_lane_f32(from, 3); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride) { to[stride*0] = vgetq_lane_s32(from, 0); to[stride*1] = vgetq_lane_s32(from, 1); @@ -301,12 +301,12 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const to[stride*3] = vgetq_lane_s32(from, 3); } -template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); } -template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); } +template<> EIGEN_STRONG_INLINE void prefetch<float> (const float* addr) { EIGEN_ARM_PREFETCH(addr); } +template<> EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) { EIGEN_ARM_PREFETCH(addr); } // FIXME only store the 2 first elements ? -template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } -template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; } +template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } +template<> EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) { int32_t EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; } template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { float32x2_t a_lo, a_hi; @@ -361,7 +361,7 @@ template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs) return sum; } -template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a) { int32x2_t a_lo, a_hi, sum; @@ -408,7 +408,7 @@ template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) return vget_lane_f32(prod, 0); } -template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a) { int32x2_t a_lo, a_hi, prod; @@ -436,7 +436,7 @@ template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) return vget_lane_f32(min, 0); } -template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a) { int32x2_t a_lo, a_hi, min; @@ -461,7 +461,7 @@ template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) return vget_lane_f32(max, 0); } -template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) { int32x2_t a_lo, a_hi, max; |