aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AltiVec
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2014-04-25 02:46:57 -0700
committerGravatar Gael Guennebaud <g.gael@free.fr>2014-04-25 02:46:57 -0700
commit2dbfd83424cd0d30dac3b42b27b970b44a4e4541 (patch)
treef9fa5f32f9bf5526a0b67d325482fbf10c7d3811 /Eigen/src/Core/arch/AltiVec
parent7388fdf560ad0c1af036c60bb926a14d422ecda3 (diff)
Implement pbroadcast4 on altivec
Diffstat (limited to 'Eigen/src/Core/arch/AltiVec')
-rw-r--r--Eigen/src/Core/arch/AltiVec/Complex.h2
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h26
2 files changed, 25 insertions, 3 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index 8fdffad5e..5409ddedd 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -229,7 +229,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x
return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
}
-template<> EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
{
Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_0);
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_1);
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 8a67354e4..0e9adf450 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -168,6 +168,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
return vc;
}
+
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = vec_ld(0,a);
+ a0 = vec_splat(a3, 0);
+ a1 = vec_splat(a3, 1);
+ a2 = vec_splat(a3, 2);
+ a3 = vec_splat(a3, 3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4i>(const int *a,
+ Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
+{
+ a3 = vec_ld(0,a);
+ a0 = vec_splat(a3, 0);
+ a1 = vec_splat(a3, 1);
+ a2 = vec_splat(a3, 2);
+ a3 = vec_splat(a3, 3);
+}
+
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride)
{
float EIGEN_ALIGN16 af[4];
@@ -538,7 +560,7 @@ struct palign_impl<Offset,Packet4i>
}
};
-template<> EIGEN_DEVICE_FUNC inline void
+EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4f,4>& kernel) {
Packet4f t0, t1, t2, t3;
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
@@ -551,7 +573,7 @@ ptranspose(PacketBlock<Packet4f,4>& kernel) {
kernel.packet[3] = vec_mergel(t1, t3);
}
-template<> EIGEN_DEVICE_FUNC inline void
+EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4i,4>& kernel) {
Packet4i t0, t1, t2, t3;
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);