diff options
author | Gael Guennebaud <g.gael@free.fr> | 2014-04-25 02:46:57 -0700 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2014-04-25 02:46:57 -0700 |
commit | 2dbfd83424cd0d30dac3b42b27b970b44a4e4541 (patch) | |
tree | f9fa5f32f9bf5526a0b67d325482fbf10c7d3811 | |
parent | 7388fdf560ad0c1af036c60bb926a14d422ecda3 (diff) |
Implement pbroadcast4 on altivec
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/Complex.h | 2 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/AltiVec/PacketMath.h | 26 |
2 files changed, 25 insertions, 3 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 8fdffad5e..5409ddedd 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -229,7 +229,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV)); } -template<> EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel) +EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel) { Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_0); kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_1); diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 8a67354e4..0e9adf450 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -168,6 +168,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vc; } + +template<> EIGEN_STRONG_INLINE void +pbroadcast4<Packet4f>(const float *a, + Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3) +{ + a3 = vec_ld(0,a); + a0 = vec_splat(a3, 0); + a1 = vec_splat(a3, 1); + a2 = vec_splat(a3, 2); + a3 = vec_splat(a3, 3); +} +template<> EIGEN_STRONG_INLINE void +pbroadcast4<Packet4i>(const int *a, + Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3) +{ + a3 = vec_ld(0,a); + a0 = vec_splat(a3, 0); + a1 = vec_splat(a3, 1); + a2 = vec_splat(a3, 2); + a3 = vec_splat(a3, 3); +} + template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride) { float EIGEN_ALIGN16 af[4]; @@ -538,7 +560,7 @@ struct palign_impl<Offset,Packet4i> } }; -template<> EIGEN_DEVICE_FUNC inline void +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel) { Packet4f t0, t1, t2, t3; t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]); @@ -551,7 +573,7 @@ ptranspose(PacketBlock<Packet4f,4>& kernel) { kernel.packet[3] = vec_mergel(t1, t3); } -template<> EIGEN_DEVICE_FUNC inline void +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i,4>& kernel) { Packet4i t0, t1, t2, t3; t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]); |