aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AltiVec/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Chip Kerchner <chip.kerchner@ibm.com>2021-02-17 17:49:23 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2021-02-17 17:49:23 +0000
commit9b51dc7972c9f64727e9c8e8db0c60aaf9aae532 (patch)
treefc74a4266657205346b26ae7a2c78a06a9cb505e /Eigen/src/Core/arch/AltiVec/PacketMath.h
parentbe0574e2159ce3d6a1748ba6060bea5dedccdbc9 (diff)
Fixed performance issues for VSX and P10 MMA in general_matrix_matrix_product
Diffstat (limited to 'Eigen/src/Core/arch/AltiVec/PacketMath.h')
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h11
1 files changed, 5 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index df04b8e0f..495afac90 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -2338,12 +2338,11 @@ template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet2d>(const double *a,
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
{
- a1 = pload<Packet2d>(a);
- a0 = vec_splat_dbl<0>(a1);
- a1 = vec_splat_dbl<1>(a1);
- a3 = pload<Packet2d>(a+2);
- a2 = vec_splat_dbl<0>(a3);
- a3 = vec_splat_dbl<1>(a3);
+ //This way is faster than vec_splat (at least for doubles in Power 9)
+ a0 = pset1<Packet2d>(a[0]);
+ a1 = pset1<Packet2d>(a[1]);
+ a2 = pset1<Packet2d>(a[2]);
+ a3 = pset1<Packet2d>(a[3]);
}
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)