From ce99b502ce1d80129a028ddf02fff51f6c51249b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 17 Dec 2013 10:49:43 -0800 Subject: Use vectorization when packing row-major rhs matrices. (bug #717) --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 08cc14bd7..686ff84f1 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -1261,6 +1261,7 @@ EIGEN_DONT_INLINE void gemm_pack_rhs struct gemm_pack_rhs { + typedef typename packet_traits::type Packet; enum { PacketSize = packet_traits::size }; EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0); }; @@ -1282,12 +1283,18 @@ EIGEN_DONT_INLINE void gemm_pack_rhs(&rhs[k*rhsStride + j2]); + pstoreu(blockB+count, cj.pconj(A)); + count += PacketSize; + } else { + const Scalar* b0 = &rhs[k*rhsStride + j2]; + blockB[count+0] = cj(b0[0]); + blockB[count+1] = cj(b0[1]); + if(nr==4) blockB[count+2] = cj(b0[2]); + if(nr==4) blockB[count+3] = cj(b0[3]); + count += nr; + } } // skip what we have after if(PanelMode) count += nr * (stride-offset-depth); -- cgit v1.2.3