aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Pedro Caldeira <pedro.caldeira@ibm.com>2020-05-11 16:38:56 -0300
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-05-11 21:04:51 +0000
commit5fdc1792410f7c2a0aa751ed7cabc013026aef26 (patch)
tree1ff99dcaa9cb329c702a9d9268e4d2758c9074ee
parentd3e81db6c517e8c531cf9b32b818c838b5398e83 (diff)
Altivec template functions to better code reusability
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h728
1 files changed, 257 insertions, 471 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 83b75b974..d12aa2b10 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -75,6 +75,7 @@ typedef __vector uint8_t Packet16uc;
#define DST_CHAN 1
#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
+#define __UNPACK_TYPE__(PACKETNAME) typename unpacket_traits<PACKETNAME>::type
// These constants are endian-agnostic
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
@@ -375,8 +376,8 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
return s;
}
-// Need to define them first or we get specialization after instantiation errors
-template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
+template <typename Packet>
+EIGEN_STRONG_INLINE Packet pload_common(const __UNPACK_TYPE__(Packet)* from)
{
// some versions of GCC throw "unused-but-set-parameter".
// ignoring these warnings for now.
@@ -389,57 +390,39 @@ template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
#endif
}
+// Need to define them first or we get specialization after instantiation errors
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
+{
+ return pload_common<Packet4f>(from);
+}
+
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
{
- // some versions of GCC throw "unused-but-set-parameter".
- // ignoring these warnings for now.
- EIGEN_UNUSED_VARIABLE(from);
- EIGEN_DEBUG_ALIGNED_LOAD
-#ifdef __VSX__
- return vec_xl(0, from);
-#else
- return vec_ld(0, from);
-#endif
+ return pload_common<Packet4i>(from);
}
template<> EIGEN_STRONG_INLINE Packet8s pload<Packet8s>(const short int* from)
{
- // some versions of GCC throw "unused-but-set-parameter".
- // ignoring these warnings for now.
- EIGEN_UNUSED_VARIABLE(from);
- EIGEN_DEBUG_ALIGNED_LOAD
- return vec_ld(0, from);
+ return pload_common<Packet8s>(from);
}
template<> EIGEN_STRONG_INLINE Packet8us pload<Packet8us>(const unsigned short int* from)
{
- // some versions of GCC throw "unused-but-set-parameter".
- // ignoring these warnings for now.
- EIGEN_UNUSED_VARIABLE(from);
- EIGEN_DEBUG_ALIGNED_LOAD
- return vec_ld(0, from);
+ return pload_common<Packet8us>(from);
}
template<> EIGEN_STRONG_INLINE Packet16c pload<Packet16c>(const int8_t* from)
{
- // some versions of GCC throw "unused-but-set-parameter".
- // ignoring these warnings for now.
- EIGEN_UNUSED_VARIABLE(from);
- EIGEN_DEBUG_ALIGNED_LOAD
- return vec_ld(0, from);
+ return pload_common<Packet16c>(from);
}
template<> EIGEN_STRONG_INLINE Packet16uc pload<Packet16uc>(const uint8_t* from)
{
- // some versions of GCC throw "unused-but-set-parameter".
- // ignoring these warnings for now.
- EIGEN_UNUSED_VARIABLE(from);
- EIGEN_DEBUG_ALIGNED_LOAD
- return vec_ld(0, from);
+ return pload_common<Packet16uc>(from);
}
-template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
-{
+template <typename Packet>
+EIGEN_STRONG_INLINE void pstore_common(__UNPACK_TYPE__(Packet)* to, const Packet& from){
// some versions of GCC throw "unused-but-set-parameter" (float *to).
// ignoring these warnings for now.
EIGEN_UNUSED_VARIABLE(to);
@@ -448,247 +431,232 @@ template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& f
vec_xst(from, 0, to);
#else
vec_st(from, 0, to);
-#endif
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
+{
+ pstore_common<Packet4f>(to, from);
}
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
{
- // some versions of GCC throw "unused-but-set-parameter" (float *to).
- // ignoring these warnings for now.
- EIGEN_UNUSED_VARIABLE(to);
- EIGEN_DEBUG_ALIGNED_STORE
-#ifdef __VSX__
- vec_xst(from, 0, to);
-#else
- vec_st(from, 0, to);
-#endif
+ pstore_common<Packet4i>(to, from);
}
template<> EIGEN_STRONG_INLINE void pstore<short int>(short int* to, const Packet8s& from)
{
- // some versions of GCC throw "unused-but-set-parameter" (float *to).
- // ignoring these warnings for now.
- EIGEN_UNUSED_VARIABLE(to);
- EIGEN_DEBUG_ALIGNED_STORE
- vec_st(from, 0, to);
+ pstore_common<Packet8s>(to, from);
}
template<> EIGEN_STRONG_INLINE void pstore<unsigned short int>(unsigned short int* to, const Packet8us& from)
{
- // some versions of GCC throw "unused-but-set-parameter" (float *to).
- // ignoring these warnings for now.
- EIGEN_UNUSED_VARIABLE(to);
- EIGEN_DEBUG_ALIGNED_STORE
- vec_st(from, 0, to);
+ pstore_common<Packet8us>(to, from);
}
+
template<> EIGEN_STRONG_INLINE void pstore<int8_t>(int8_t* to, const Packet16c& from)
{
- // some versions of GCC throw "unused-but-set-parameter" (float *to).
- // ignoring these warnings for now.
- EIGEN_UNUSED_VARIABLE(to);
- EIGEN_DEBUG_ALIGNED_STORE
- vec_st(from, 0, to);
+ pstore_common<Packet16c>(to, from);
}
template<> EIGEN_STRONG_INLINE void pstore<uint8_t>(uint8_t* to, const Packet16uc& from)
{
- // some versions of GCC throw "unused-but-set-parameter" (float *to).
- // ignoring these warnings for now.
- EIGEN_UNUSED_VARIABLE(to);
- EIGEN_DEBUG_ALIGNED_STORE
- vec_st(from, 0, to);
+ pstore_common<Packet16uc>(to, from);
}
-template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
- Packet4f v = {from, from, from, from};
+template<typename Packet>
+EIGEN_STRONG_INLINE Packet pset1_size4(const __UNPACK_TYPE__(Packet)& from)
+{
+ Packet v = {from, from, from, from};
return v;
}
-template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
- Packet4i v = {from, from, from, from};
+template<typename Packet>
+EIGEN_STRONG_INLINE Packet pset1_size8(const __UNPACK_TYPE__(Packet)& from)
+{
+ Packet v = {from, from, from, from, from, from, from, from};
return v;
}
-template<> EIGEN_STRONG_INLINE Packet8s pset1<Packet8s>(const short int& from) {
- Packet8s v = {from, from, from, from, from, from, from, from};
+template<typename Packet>
+EIGEN_STRONG_INLINE Packet pset1_size16(const __UNPACK_TYPE__(Packet)& from)
+{
+ Packet v = {from, from, from, from, from, from, from, from, from, from, from, from, from, from, from, from};
return v;
}
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
+ return pset1_size4<Packet4f>(from);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
+ return pset1_size4<Packet4i>(from);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8s pset1<Packet8s>(const short int& from) {
+ return pset1_size8<Packet8s>(from);
+}
+
template<> EIGEN_STRONG_INLINE Packet8us pset1<Packet8us>(const unsigned short int& from) {
- Packet8us v = {from, from, from, from, from, from, from, from};
- return v;
+ return pset1_size8<Packet8us>(from);
}
template<> EIGEN_STRONG_INLINE Packet16c pset1<Packet16c>(const int8_t& from) {
- Packet16c v = {from, from, from, from, from, from, from, from, from, from, from, from, from, from, from, from};
- return v;
+ return pset1_size16<Packet16c>(from);
}
template<> EIGEN_STRONG_INLINE Packet16uc pset1<Packet16uc>(const uint8_t& from) {
- Packet16uc v = {from, from, from, from, from, from, from, from, from, from, from, from, from, from, from, from};
- return v;
+ return pset1_size16<Packet16uc>(from);
}
template<> EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(unsigned int from) {
return reinterpret_cast<Packet4f>(pset1<Packet4i>(from));
}
-template<> EIGEN_STRONG_INLINE void
-pbroadcast4<Packet4f>(const float *a,
- Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+template<typename Packet> EIGEN_STRONG_INLINE void
+pbroadcast4_common(const __UNPACK_TYPE__(Packet) *a,
+ Packet& a0, Packet& a1, Packet& a2, Packet& a3)
{
- a3 = pload<Packet4f>(a);
+ a3 = pload<Packet>(a);
a0 = vec_splat(a3, 0);
a1 = vec_splat(a3, 1);
a2 = vec_splat(a3, 2);
a3 = vec_splat(a3, 3);
}
+
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ pbroadcast4_common<Packet4f>(a, a0, a1, a2, a3);
+}
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4i>(const int *a,
Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
{
- a3 = pload<Packet4i>(a);
- a0 = vec_splat(a3, 0);
- a1 = vec_splat(a3, 1);
- a2 = vec_splat(a3, 2);
- a3 = vec_splat(a3, 3);
+ pbroadcast4_common<Packet4i>(a, a0, a1, a2, a3);
+}
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_common(const __UNPACK_TYPE__(Packet)* from, Index stride)
+{
+ EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[4];
+ a[0] = from[0*stride];
+ a[1] = from[1*stride];
+ a[2] = from[2*stride];
+ a[3] = from[3*stride];
+ return pload<Packet>(a);
}
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
{
- EIGEN_ALIGN16 float af[4];
- af[0] = from[0*stride];
- af[1] = from[1*stride];
- af[2] = from[2*stride];
- af[3] = from[3*stride];
- return pload<Packet4f>(af);
+ return pgather_common<Packet4f>(from, stride);
}
+
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
{
- EIGEN_ALIGN16 int ai[4];
- ai[0] = from[0*stride];
- ai[1] = from[1*stride];
- ai[2] = from[2*stride];
- ai[3] = from[3*stride];
- return pload<Packet4i>(ai);
+ return pgather_common<Packet4i>(from, stride);
}
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_size8(const __UNPACK_TYPE__(Packet)* from, Index stride)
+{
+ EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[8];
+ a[0] = from[0*stride];
+ a[1] = from[1*stride];
+ a[2] = from[2*stride];
+ a[3] = from[3*stride];
+ a[4] = from[4*stride];
+ a[5] = from[5*stride];
+ a[6] = from[6*stride];
+ a[7] = from[7*stride];
+ return pload<Packet>(a);
+}
+
template<> EIGEN_DEVICE_FUNC inline Packet8s pgather<short int, Packet8s>(const short int* from, Index stride)
{
- EIGEN_ALIGN16 short int ai[8];
- ai[0] = from[0*stride];
- ai[1] = from[1*stride];
- ai[2] = from[2*stride];
- ai[3] = from[3*stride];
- ai[4] = from[4*stride];
- ai[5] = from[5*stride];
- ai[6] = from[6*stride];
- ai[7] = from[7*stride];
- return pload<Packet8s>(ai);
+ return pgather_size8<Packet8s>(from, stride);
}
template<> EIGEN_DEVICE_FUNC inline Packet8us pgather<unsigned short int, Packet8us>(const unsigned short int* from, Index stride)
{
- EIGEN_ALIGN16 unsigned short int ai[8];
- ai[0] = from[0*stride];
- ai[1] = from[1*stride];
- ai[2] = from[2*stride];
- ai[3] = from[3*stride];
- ai[4] = from[4*stride];
- ai[5] = from[5*stride];
- ai[6] = from[6*stride];
- ai[7] = from[7*stride];
- return pload<Packet8us>(ai);
+ return pgather_size8<Packet8us>(from, stride);
+}
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_size16(const __UNPACK_TYPE__(Packet)* from, Index stride)
+{
+ EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[16];
+ a[0] = from[0*stride];
+ a[1] = from[1*stride];
+ a[2] = from[2*stride];
+ a[3] = from[3*stride];
+ a[4] = from[4*stride];
+ a[5] = from[5*stride];
+ a[6] = from[6*stride];
+ a[7] = from[7*stride];
+ a[8] = from[8*stride];
+ a[9] = from[9*stride];
+ a[10] = from[10*stride];
+ a[11] = from[11*stride];
+ a[12] = from[12*stride];
+ a[13] = from[13*stride];
+ a[14] = from[14*stride];
+ a[15] = from[15*stride];
+ return pload<Packet>(a);
}
+
template<> EIGEN_DEVICE_FUNC inline Packet16c pgather<int8_t, Packet16c>(const int8_t* from, Index stride)
{
- EIGEN_ALIGN16 int8_t ai[16];
- ai[0] = from[0*stride];
- ai[1] = from[1*stride];
- ai[2] = from[2*stride];
- ai[3] = from[3*stride];
- ai[4] = from[4*stride];
- ai[5] = from[5*stride];
- ai[6] = from[6*stride];
- ai[7] = from[7*stride];
- ai[8] = from[8*stride];
- ai[9] = from[9*stride];
- ai[10] = from[10*stride];
- ai[11] = from[11*stride];
- ai[12] = from[12*stride];
- ai[13] = from[13*stride];
- ai[14] = from[14*stride];
- ai[15] = from[15*stride];
- return pload<Packet16c>(ai);
+ return pgather_size16<Packet16c>(from, stride);
}
template<> EIGEN_DEVICE_FUNC inline Packet16uc pgather<uint8_t, Packet16uc>(const uint8_t* from, Index stride)
{
- EIGEN_ALIGN16 uint8_t ai[16];
- ai[0] = from[0*stride];
- ai[1] = from[1*stride];
- ai[2] = from[2*stride];
- ai[3] = from[3*stride];
- ai[4] = from[4*stride];
- ai[5] = from[5*stride];
- ai[6] = from[6*stride];
- ai[7] = from[7*stride];
- ai[8] = from[8*stride];
- ai[9] = from[9*stride];
- ai[10] = from[10*stride];
- ai[11] = from[11*stride];
- ai[12] = from[12*stride];
- ai[13] = from[13*stride];
- ai[14] = from[14*stride];
- ai[15] = from[15*stride];
- return pload<Packet16uc>(ai);
+ return pgather_size16<Packet16uc>(from, stride);
+}
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_size4(__UNPACK_TYPE__(Packet)* to, const Packet& from, Index stride)
+{
+ EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[4];
+ pstore<__UNPACK_TYPE__(Packet)>(a, from);
+ to[0*stride] = a[0];
+ to[1*stride] = a[1];
+ to[2*stride] = a[2];
+ to[3*stride] = a[3];
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
{
- EIGEN_ALIGN16 float af[4];
- pstore<float>(af, from);
- to[0*stride] = af[0];
- to[1*stride] = af[1];
- to[2*stride] = af[2];
- to[3*stride] = af[3];
+ pscatter_size4<Packet4f>(to, from, stride);
}
+
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
{
- EIGEN_ALIGN16 int ai[4];
- pstore<int>((int *)ai, from);
- to[0*stride] = ai[0];
- to[1*stride] = ai[1];
- to[2*stride] = ai[2];
- to[3*stride] = ai[3];
+ pscatter_size4<Packet4i>(to, from, stride);
+}
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_size8(__UNPACK_TYPE__(Packet)* to, const Packet& from, Index stride)
+{
+ EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[8];
+ pstore<__UNPACK_TYPE__(Packet)>(a, from);
+ to[0*stride] = a[0];
+ to[1*stride] = a[1];
+ to[2*stride] = a[2];
+ to[3*stride] = a[3];
+ to[4*stride] = a[4];
+ to[5*stride] = a[5];
+ to[6*stride] = a[6];
+ to[7*stride] = a[7];
}
+
template<> EIGEN_DEVICE_FUNC inline void pscatter<short int, Packet8s>(short int* to, const Packet8s& from, Index stride)
{
- EIGEN_ALIGN16 short int ai[8];
- pstore<short int>((short int *)ai, from);
- to[0*stride] = ai[0];
- to[1*stride] = ai[1];
- to[2*stride] = ai[2];
- to[3*stride] = ai[3];
- to[4*stride] = ai[4];
- to[5*stride] = ai[5];
- to[6*stride] = ai[6];
- to[7*stride] = ai[7];
+ pscatter_size8<Packet8s>(to, from, stride);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<unsigned short int, Packet8us>(unsigned short int* to, const Packet8us& from, Index stride)
{
- EIGEN_ALIGN16 unsigned short int ai[8];
- pstore<unsigned short int>((unsigned short int *)ai, from);
- to[0*stride] = ai[0];
- to[1*stride] = ai[1];
- to[2*stride] = ai[2];
- to[3*stride] = ai[3];
- to[4*stride] = ai[4];
- to[5*stride] = ai[5];
- to[6*stride] = ai[6];
- to[7*stride] = ai[7];
+ pscatter_size8<Packet8us>(to, from, stride);
}
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return pset1<Packet4f>(a) + p4f_COUNTDOWN; }
@@ -819,122 +787,62 @@ template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
-#ifdef _BIG_ENDIAN
-template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
-{
- EIGEN_DEBUG_ALIGNED_LOAD
- Packet16uc MSQ, LSQ;
- Packet16uc mask;
- MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword
- LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword
- mask = vec_lvsl(0, from); // create the permute mask
- return (Packet4f) vec_perm(MSQ, LSQ, mask); // align the data
-
-}
-template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
-{
- EIGEN_DEBUG_ALIGNED_LOAD
- // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
- Packet16uc MSQ, LSQ;
- Packet16uc mask;
- MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword
- LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword
- mask = vec_lvsl(0, from); // create the permute mask
- return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data
-}
-template<> EIGEN_STRONG_INLINE Packet8s ploadu<Packet8s>(const short int* from)
+template<typename Packet> EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet)* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
- // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
- Packet16uc MSQ, LSQ;
- Packet16uc mask;
- MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword
- LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword
- mask = vec_lvsl(0, from); // create the permute mask
- return static_cast<Packet8s>(vec_perm(MSQ, LSQ, mask)); // align the data
-}
-template<> EIGEN_STRONG_INLINE Packet8us ploadu<Packet8us>(const unsigned short int* from)
-{
- EIGEN_DEBUG_ALIGNED_LOAD
- // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
+#ifdef _BIG_ENDIAN
Packet16uc MSQ, LSQ;
Packet16uc mask;
MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword
LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword
mask = vec_lvsl(0, from); // create the permute mask
- return static_cast<Packet8us>(vec_perm(MSQ, LSQ, mask)); // align the data
-}
-template<> EIGEN_STRONG_INLINE Packet16c ploadu<Packet16c>(const char* from)
-{
- EIGEN_DEBUG_ALIGNED_LOAD
- // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
- Packet16uc MSQ, LSQ;
- Packet16uc mask;
- MSQ = vec_ld(0, from); // most significant quadword
- LSQ = vec_ld(15, from); // least significant quadword
- mask = vec_lvsl(0, from); // create the permute mask
- return static_cast<Packet16c>(vec_perm(MSQ, LSQ, mask)); // align the data
-}
-
-template<> EIGEN_STRONG_INLINE Packet16uc ploadu<Packet16uc>(const unsigned char* from)
-{
- EIGEN_DEBUG_ALIGNED_LOAD
- // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
- Packet16uc MSQ, LSQ;
- Packet16uc mask;
- MSQ = vec_ld(0, from); // most significant quadword
- LSQ = vec_ld(15, from); // least significant quadword
- mask = vec_lvsl(0, from); // create the permute mask
- return static_cast<Packet16uc>(vec_perm(MSQ, LSQ, mask)); // align the data
-}
+ //TODO: Add static_cast here
+ return (Packet) vec_perm(MSQ, LSQ, mask); // align the data
#else
-// We also need to redefine little endian loading of Packet4i/Packet4f using VSX
-template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
-{
EIGEN_DEBUG_UNALIGNED_LOAD
return vec_xl(0, from);
+#endif
}
+
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{
- EIGEN_DEBUG_UNALIGNED_LOAD
- return vec_xl(0, from);
+ return ploadu_common<Packet4f>(from);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ return ploadu_common<Packet4i>(from);
}
template<> EIGEN_STRONG_INLINE Packet8s ploadu<Packet8s>(const short int* from)
{
- EIGEN_DEBUG_UNALIGNED_LOAD
- return vec_vsx_ld(0, from);
+ return ploadu_common<Packet8s>(from);
}
template<> EIGEN_STRONG_INLINE Packet8us ploadu<Packet8us>(const unsigned short int* from)
{
- EIGEN_DEBUG_UNALIGNED_LOAD
- return vec_vsx_ld(0, from);
+ return ploadu_common<Packet8us>(from);
}
template<> EIGEN_STRONG_INLINE Packet16c ploadu<Packet16c>(const int8_t* from)
{
- EIGEN_DEBUG_UNALIGNED_LOAD
- return vec_vsx_ld(0, from);
+ return ploadu_common<Packet16c>(from);
}
template<> EIGEN_STRONG_INLINE Packet16uc ploadu<Packet16uc>(const uint8_t* from)
{
- EIGEN_DEBUG_UNALIGNED_LOAD
- return vec_vsx_ld(0, from);
+ return ploadu_common<Packet16uc>(from);
}
-#endif
-template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+template<typename Packet> EIGEN_STRONG_INLINE Packet ploaddup_common(const __UNPACK_TYPE__(Packet)* from)
{
- Packet4f p;
- if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
- else p = ploadu<Packet4f>(from);
+ Packet p;
+ if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet>(from);
+ else p = ploadu<Packet>(from);
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
}
-
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ return ploaddup_common<Packet4f>(from);
+}
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
- Packet4i p;
- if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
- else p = ploadu<Packet4i>(from);
- return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+ return ploaddup_common<Packet4i>(from);
}
template<> EIGEN_STRONG_INLINE Packet8s ploaddup<Packet8s>(const short int* from)
@@ -985,10 +893,10 @@ template<> EIGEN_STRONG_INLINE Packet16uc ploaddup<Packet16uc>(const uint8_t*
return vec_perm(p, p, p16uc_DUPLICATE8_HI);
}
-#ifdef _BIG_ENDIAN
-template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
+template<typename Packet> EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet)* to, const Packet& from)
{
EIGEN_DEBUG_UNALIGNED_STORE
+#ifdef _BIG_ENDIAN
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
// Warning: not thread safe!
Packet16uc MSQ, LSQ, edges;
@@ -1002,140 +910,34 @@ template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& f
MSQ = vec_perm(edges,(Packet16uc)from,align); // misalign the data (MSQ)
LSQ = vec_perm((Packet16uc)from,edges,align); // misalign the data (LSQ)
vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
- vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
+#else
+ vec_xst(from, 0, to);
+#endif
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
+{
+ pstoreu_common<Packet4f>(to, from);
}
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
{
- EIGEN_DEBUG_UNALIGNED_STORE
- // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
- // Warning: not thread safe!
- Packet16uc MSQ, LSQ, edges;
- Packet16uc edgeAlign, align;
-
- MSQ = vec_ld(0, (unsigned char *)to); // most significant quadword
- LSQ = vec_ld(15, (unsigned char *)to); // least significant quadword
- edgeAlign = vec_lvsl(0, to); // permute map to extract edges
- edges=vec_perm(LSQ, MSQ, edgeAlign); // extract the edges
- align = vec_lvsr( 0, to ); // permute map to misalign data
- MSQ = vec_perm(edges, (Packet16uc) from, align); // misalign the data (MSQ)
- LSQ = vec_perm((Packet16uc) from, edges, align); // misalign the data (LSQ)
- vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
- vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
+ pstoreu_common<Packet4i>(to, from);
}
template<> EIGEN_STRONG_INLINE void pstoreu<short int>(short int* to, const Packet8s& from)
{
- EIGEN_DEBUG_UNALIGNED_STORE
- // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
- // Warning: not thread safe!
- Packet16uc MSQ, LSQ, edges;
- Packet16uc edgeAlign, align;
-
- MSQ = vec_ld(0, (unsigned char *)to); // most significant quadword
- LSQ = vec_ld(15, (unsigned char *)to); // least significant quadword
- edgeAlign = vec_lvsl(0, to); // permute map to extract edges
- edges = vec_perm(LSQ, MSQ, edgeAlign); // extract the edges
- align = vec_lvsr( 0, to ); // permute map to misalign data
- MSQ = vec_perm(edges, (Packet16uc) from, align); // misalign the data (MSQ)
- LSQ = vec_perm((Packet16uc) from, edges, align); // misalign the data (LSQ)
- vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
- vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
+ pstoreu_common<Packet8s>(to, from);
}
template<> EIGEN_STRONG_INLINE void pstoreu<unsigned short int>(unsigned short int* to, const Packet8us& from)
{
- EIGEN_DEBUG_UNALIGNED_STORE
- // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
- // Warning: not thread safe!
- Packet16uc MSQ, LSQ, edges;
- Packet16uc edgeAlign, align;
-
- MSQ = vec_ld(0, (unsigned char *)to); // most significant quadword
- LSQ = vec_ld(15, (unsigned char *)to); // least significant quadword
- edgeAlign = vec_lvsl(0, to); // permute map to extract edges
- edges = vec_perm(LSQ, MSQ, edgeAlign); // extract the edges
- align = vec_lvsr( 0, to ); // permute map to misalign data
- MSQ = vec_perm(edges, (Packet16uc) from, align); // misalign the data (MSQ)
- LSQ = vec_perm((Packet16uc) from, edges, align); // misalign the data (LSQ)
- vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
- vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
-}
-
-template<> EIGEN_STRONG_INLINE void pstoreu<char>(char* to, const Packet16c& from)
-{
- EIGEN_DEBUG_UNALIGNED_STORE
- // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
- // Warning: not thread safe!
- Packet16uc MSQ, LSQ, edges;
- Packet16uc edgeAlign, align;
-
- MSQ = vec_ld(0, to); // most significant quadword
- LSQ = vec_ld(15,to); // least significant quadword
- edgeAlign = vec_lvsl(0, to); // permute map to extract edges
- edges=vec_perm(LSQ, MSQ, edgeAlign); // extract the edges
- align = vec_lvsr( 0, to ); // permute map to misalign data
- MSQ = vec_perm(edges, (Packet16uc) from, align); // misalign the data (MSQ)
- LSQ = vec_perm((Packet16uc) from, edges, align); // misalign the data (LSQ)
- vec_st( LSQ, 15, to ); // Store the LSQ part first
- vec_st( MSQ, 0, to ); // Store the MSQ part
-}
-template<> EIGEN_STRONG_INLINE void pstoreu<unsigned char>(unsigned char* to, const Packet16uc& from)
-{
- EIGEN_DEBUG_UNALIGNED_STORE
- // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
- // Warning: not thread safe!
- Packet16uc MSQ, LSQ, edges;
- Packet16uc edgeAlign, align;
-
- MSQ = vec_ld(0, to); // most significant quadword
- LSQ = vec_ld(15,to); // least significant quadword
- edgeAlign = vec_lvsl(0, to); // permute map to extract edges
- edges=vec_perm(LSQ, MSQ, edgeAlign); // extract the edges
- align = vec_lvsr( 0, to ); // permute map to misalign data
- MSQ = vec_perm(edges, (Packet16uc) from, align); // misalign the data (MSQ)
- LSQ = vec_perm((Packet16uc) from, edges, align); // misalign the data (LSQ)
- vec_st( LSQ, 15, to ); // Store the LSQ part first
- vec_st( MSQ, 0, to ); // Store the MSQ part
-}
-#else
-// We also need to redefine little endian loading of Packet4i/Packet4f using VSX
-template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
-{
- EIGEN_DEBUG_UNALIGNED_STORE
- vec_xst(from, 0, to);
-}
-template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
-{
- EIGEN_DEBUG_UNALIGNED_STORE
- vec_xst(from, 0, to);
-}
-template<> EIGEN_STRONG_INLINE void pstoreu<short int>(short int* to, const Packet8s& from)
-{
- EIGEN_DEBUG_UNALIGNED_STORE
- /*GCC provides a commonly used synonym for vec_xst called vec_vsx_st.
- * Although these have the same behavior,
- * only vec_xst is guaranteed to be portable across compliant compilers
- * vec_xst should be preferred. */
- vec_xst(from, 0, to);
-}
-template<> EIGEN_STRONG_INLINE void pstoreu<unsigned short int>(unsigned short int* to, const Packet8us& from)
-{
- EIGEN_DEBUG_UNALIGNED_STORE
- /*GCC provides a commonly used synonym for vec_xst called vec_vsx_st.
- * Although these have the same behavior,
- * only vec_xst is guaranteed to be portable across compliant compilers
- * vec_xst should be preferred. */
- vec_xst(from, 0, to);
+ pstoreu_common<Packet8us>(to, from);
}
-template<> EIGEN_STRONG_INLINE void pstoreu<int8_t>(int8_t* to, const Packet16c& from)
+template<> EIGEN_STRONG_INLINE void pstoreu<int8_t>(int8_t* to, const Packet16c& from)
{
- EIGEN_DEBUG_UNALIGNED_STORE
- vec_vsx_st(from, 0, to);
+ pstoreu_common<Packet16c>(to, from);
}
-template<> EIGEN_STRONG_INLINE void pstoreu<uint8_t>(uint8_t* to, const Packet16uc& from)
+template<> EIGEN_STRONG_INLINE void pstoreu<uint8_t>(uint8_t* to, const Packet16uc& from)
{
- EIGEN_DEBUG_UNALIGNED_STORE
- vec_vsx_st(from, 0, to);
+ pstoreu_common<Packet16uc>(to, from);
}
-#endif
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_PPC_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_PPC_PREFETCH(addr); }
@@ -1143,29 +945,28 @@ template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGE
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { EIGEN_ALIGN16 float x; vec_ste(a, 0, &x); return x; }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { EIGEN_ALIGN16 int x; vec_ste(a, 0, &x); return x; }
-template<> EIGEN_STRONG_INLINE short int pfirst<Packet8s>(const Packet8s& a) {
- EIGEN_ALIGN16 short int x;
+template<typename Packet> EIGEN_STRONG_INLINE __UNPACK_TYPE__(Packet) pfirst_common(const Packet& a) {
+ EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) x;
vec_ste(a, 0, &x);
return x;
}
+template<> EIGEN_STRONG_INLINE short int pfirst<Packet8s>(const Packet8s& a) {
+ return pfirst_common<Packet8s>(a);
+}
+
template<> EIGEN_STRONG_INLINE unsigned short int pfirst<Packet8us>(const Packet8us& a) {
- EIGEN_ALIGN16 unsigned short int x;
- vec_ste(a, 0, &x);
- return x;
+ return pfirst_common<Packet8us>(a);
}
template<> EIGEN_STRONG_INLINE int8_t pfirst<Packet16c>(const Packet16c& a)
{
- EIGEN_ALIGN16 int8_t x;
- vec_ste(a, 0, &x);
- return x;
+ return pfirst_common<Packet16c>(a);
}
+
template<> EIGEN_STRONG_INLINE uint8_t pfirst<Packet16uc>(const Packet16uc& a)
{
- EIGEN_ALIGN16 uint8_t x;
- vec_ste(a, 0, &x);
- return x;
+ return pfirst_common<Packet16uc>(a);
}
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
@@ -1237,44 +1038,37 @@ template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
return pfirst(sum);
}
-template<> EIGEN_STRONG_INLINE short int predux<Packet8s>(const Packet8s& a)
+template<typename Packet> EIGEN_STRONG_INLINE __UNPACK_TYPE__(Packet) predux_size8(const Packet& a)
{
union{
- Packet8s v;
- short int n[8];
+ Packet v;
+ __UNPACK_TYPE__(Packet) n[8];
} vt;
vt.v = a;
- EIGEN_ALIGN16 int first_loader[4] = { vt.n[0], vt.n[1], vt.n[2], vt.n[3] };
+ EIGEN_ALIGN16 int first_loader[4] = { vt.n[0], vt.n[1], vt.n[2], vt.n[3] };
EIGEN_ALIGN16 int second_loader[4] = { vt.n[4], vt.n[5], vt.n[6], vt.n[7] };
Packet4i first_half = pload<Packet4i>(first_loader);
Packet4i second_half = pload<Packet4i>(second_loader);
- return static_cast<short int>(predux(first_half) + predux(second_half));
+ return static_cast<__UNPACK_TYPE__(Packet)>(predux(first_half) + predux(second_half));
}
-template<> EIGEN_STRONG_INLINE unsigned short int predux<Packet8us>(const Packet8us& a)
+template<> EIGEN_STRONG_INLINE short int predux<Packet8s>(const Packet8s& a)
{
- union{
- Packet8us v;
- unsigned short int n[8];
- } vt;
- vt.v = a;
-
- //There is no predux for Packet4ui. So we are intentionally using int
- EIGEN_ALIGN16 int first_loader[4] = { vt.n[0], vt.n[1], vt.n[2], vt.n[3] };
- EIGEN_ALIGN16 int second_loader[4] = { vt.n[4], vt.n[5], vt.n[6], vt.n[7] };
- Packet4i first_half = pload<Packet4i>(first_loader);
- Packet4i second_half = pload<Packet4i>(second_loader);
+ return predux_size8<Packet8s>(a);
+}
- return static_cast<unsigned short int>(predux(first_half) + predux(second_half));
+template<> EIGEN_STRONG_INLINE unsigned short int predux<Packet8us>(const Packet8us& a)
+{
+ return predux_size8<Packet8us>(a);
}
-template<> EIGEN_STRONG_INLINE int8_t predux<Packet16c>(const Packet16c& a)
+template<typename Packet> EIGEN_STRONG_INLINE __UNPACK_TYPE__(Packet) predux_size16(const Packet& a)
{
union{
- Packet16c v;
- int8_t n[16];
+ Packet v;
+ __UNPACK_TYPE__(Packet) n[16];
} vt;
vt.v = a;
@@ -1288,33 +1082,19 @@ template<> EIGEN_STRONG_INLINE int8_t predux<Packet16c>(const Packet16c& a)
Packet4i third_quarter = pload<Packet4i>(third_loader);
Packet4i fourth_quarter = pload<Packet4i>(fourth_loader);
- return static_cast<int8_t>(predux(first_quarter) + predux(second_quarter)
+ return static_cast<__UNPACK_TYPE__(Packet)>(predux(first_quarter) + predux(second_quarter)
+ predux(third_quarter) + predux(fourth_quarter));
}
-template<> EIGEN_STRONG_INLINE uint8_t predux<Packet16uc>(const Packet16uc& a)
+template<> EIGEN_STRONG_INLINE int8_t predux<Packet16c>(const Packet16c& a)
{
- union{
- Packet16uc v;
- uint8_t n[16];
- } vt;
- vt.v = a;
-
- EIGEN_ALIGN16 int first_loader[4] = { vt.n[0], vt.n[1], vt.n[2], vt.n[3] };
- EIGEN_ALIGN16 int second_loader[4] = { vt.n[4], vt.n[5], vt.n[6], vt.n[7] };
- EIGEN_ALIGN16 int third_loader[4] = { vt.n[8], vt.n[9], vt.n[10], vt.n[11] };
- EIGEN_ALIGN16 int fourth_loader[4] = { vt.n[12], vt.n[13], vt.n[14], vt.n[15] };
-
- Packet4i first_quarter = pload<Packet4i>(first_loader);
- Packet4i second_quarter = pload<Packet4i>(second_loader);
- Packet4i third_quarter = pload<Packet4i>(third_loader);
- Packet4i fourth_quarter = pload<Packet4i>(fourth_loader);
-
-
- return static_cast<uint8_t>(predux(first_quarter) + predux(second_quarter)
- + predux(third_quarter) + predux(fourth_quarter));
+ return predux_size16<Packet16c>(a);
}
+template<> EIGEN_STRONG_INLINE uint8_t predux<Packet16uc>(const Packet16uc& a)
+{
+ return predux_size16<Packet16uc>(a);
+}
// Other reduction functions:
// mul
@@ -1379,20 +1159,24 @@ template<> EIGEN_STRONG_INLINE uint8_t predux_mul<Packet16uc>(const Packet16uc&
}
// min
-template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+template<typename Packet> EIGEN_STRONG_INLINE
+__UNPACK_TYPE__(Packet) predux_min4(const Packet& a)
{
- Packet4f b, res;
+ Packet b, res;
b = vec_min(a, vec_sld(a, a, 8));
res = vec_min(b, vec_sld(b, b, 4));
return pfirst(res);
}
+
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ return predux_min4<Packet4f>(a);
+}
+
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
{
- Packet4i b, res;
- b = vec_min(a, vec_sld(a, a, 8));
- res = vec_min(b, vec_sld(b, b, 4));
- return pfirst(res);
+ return predux_min4<Packet4i>(a);
}
template<> EIGEN_STRONG_INLINE short int predux_min<Packet8s>(const Packet8s& a)
@@ -1449,20 +1233,22 @@ template<> EIGEN_STRONG_INLINE uint8_t predux_min<Packet16uc>(const Packet16uc&
return pfirst(result);
}
// max
-template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+template<typename Packet> EIGEN_STRONG_INLINE __UNPACK_TYPE__(Packet) predux_max4(const Packet& a)
{
- Packet4f b, res;
+ Packet b, res;
b = vec_max(a, vec_sld(a, a, 8));
res = vec_max(b, vec_sld(b, b, 4));
return pfirst(res);
}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ return predux_max4<Packet4f>(a);
+}
+
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
{
- Packet4i b, res;
- b = vec_max(a, vec_sld(a, a, 8));
- res = vec_max(b, vec_sld(b, b, 4));
- return pfirst(res);
+ return predux_max4<Packet4i>(a);
}
template<> EIGEN_STRONG_INLINE short int predux_max<Packet8s>(const Packet8s& a)
@@ -1524,9 +1310,9 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
return vec_any_ne(x, pzero(x));
}
-EIGEN_DEVICE_FUNC inline void
-ptranspose(PacketBlock<Packet4f,4>& kernel) {
- Packet4f t0, t1, t2, t3;
+template <typename T> EIGEN_DEVICE_FUNC inline void
+ptranpose_common(PacketBlock<T,4>& kernel){
+ T t0, t1, t2, t3;
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
@@ -1538,16 +1324,13 @@ ptranspose(PacketBlock<Packet4f,4>& kernel) {
}
EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ ptranpose_common<Packet4f>(kernel);
+}
+
+EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4i,4>& kernel) {
- Packet4i t0, t1, t2, t3;
- t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
- t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
- t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
- t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
- kernel.packet[0] = vec_mergeh(t0, t2);
- kernel.packet[1] = vec_mergel(t0, t2);
- kernel.packet[2] = vec_mergeh(t1, t3);
- kernel.packet[3] = vec_mergel(t1, t3);
+ ptranpose_common<Packet4i>(kernel);
}
EIGEN_DEVICE_FUNC inline void
@@ -1811,16 +1594,19 @@ ptranspose(PacketBlock<Packet16uc,16>& kernel) {
kernel.packet[15] = vec_mergel(step3[7], step3[15]);
}
-template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+template<typename Packet> EIGEN_STRONG_INLINE
+Packet pblend4(const Selector<4>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
return vec_sel(elsePacket, thenPacket, mask);
}
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ return pblend4<Packet4i>(ifPacket, thenPacket, elsePacket);
+}
+
template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
- Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
- Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
- return vec_sel(elsePacket, thenPacket, mask);
+ return pblend4<Packet4f>(ifPacket, thenPacket, elsePacket);
}
template<> EIGEN_STRONG_INLINE Packet8s pblend(const Selector<8>& ifPacket, const Packet8s& thenPacket, const Packet8s& elsePacket) {