aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2014-04-25 10:56:18 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2014-04-25 10:56:18 +0200
commit3d8d0f6269478a06f4fcbd4b838c8e9b9d7e9d62 (patch)
tree9f55e81498c1acc61ea0c10bce0ead69a71875d1 /Eigen/src/Core/arch/SSE
parentb0e19db1cf462a07e25429d4f04f7d8e858f670f (diff)
Enable vectorization of pack_rhs with a column-major RHS.
Rename and generalize Kernel<*> to PacketBlock<*,N>.
Diffstat (limited to 'Eigen/src/Core/arch/SSE')
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h4
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h12
2 files changed, 8 insertions, 8 deletions
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index d0c080c4f..758183c18 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -462,8 +462,8 @@ EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
return Packet1cd(preverse(Packet2d(x.v)));
}
-template<> EIGEN_DEVICE_FUNC inline void
-ptranspose(Kernel<Packet2cf>& kernel) {
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cf,2>& kernel) {
__m128d w1 = _mm_castps_pd(kernel.packet[0].v);
__m128d w2 = _mm_castps_pd(kernel.packet[1].v);
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 4f9d8c4fd..ad935d5f1 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -784,20 +784,20 @@ struct palign_impl<Offset,Packet2d>
};
#endif
-template<> EIGEN_DEVICE_FUNC inline void
-ptranspose(Kernel<Packet4f>& kernel) {
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
_MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
}
-template<> EIGEN_DEVICE_FUNC inline void
-ptranspose(Kernel<Packet2d>& kernel) {
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
__m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
kernel.packet[1] = tmp;
}
-template<> EIGEN_DEVICE_FUNC inline void
-ptranspose(Kernel<Packet4i>& kernel) {
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
__m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
__m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
__m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);