diff options
author | Chip Kerchner <chip.kerchner@ibm.com> | 2021-02-17 17:49:23 +0000 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2021-02-17 17:49:23 +0000 |
commit | 9b51dc7972c9f64727e9c8e8db0c60aaf9aae532 (patch) | |
tree | fc74a4266657205346b26ae7a2c78a06a9cb505e /Eigen/src/Core/arch/AltiVec/PacketMath.h | |
parent | be0574e2159ce3d6a1748ba6060bea5dedccdbc9 (diff) |
Fixed performance issues for VSX and P10 MMA in general_matrix_matrix_product
Diffstat (limited to 'Eigen/src/Core/arch/AltiVec/PacketMath.h')
-rwxr-xr-x | Eigen/src/Core/arch/AltiVec/PacketMath.h | 11 |
1 files changed, 5 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index df04b8e0f..495afac90 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -2338,12 +2338,11 @@ template<> EIGEN_STRONG_INLINE void pbroadcast4<Packet2d>(const double *a, Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3) { - a1 = pload<Packet2d>(a); - a0 = vec_splat_dbl<0>(a1); - a1 = vec_splat_dbl<1>(a1); - a3 = pload<Packet2d>(a+2); - a2 = vec_splat_dbl<0>(a3); - a3 = vec_splat_dbl<1>(a3); + //This way is faster than vec_splat (at least for doubles in Power 9) + a0 = pset1<Packet2d>(a[0]); + a1 = pset1<Packet2d>(a[1]); + a2 = pset1<Packet2d>(a[2]); + a3 = pset1<Packet2d>(a[3]); } template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) |