aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/products')
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h18
1 files changed, 17 insertions, 1 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index eeeb5290f..28c2a913e 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -1033,6 +1033,7 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, Pan
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
Index packet_cols = (cols/nr) * nr;
Index count = 0;
+ const Index peeled_k = (depth/PacketSize)*PacketSize;
for(Index j2=0; j2<packet_cols; j2+=nr)
{
// skip what we have before
@@ -1045,7 +1046,22 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, Pan
const Scalar* b5 = &rhs[(j2+5)*rhsStride];
const Scalar* b6 = &rhs[(j2+6)*rhsStride];
const Scalar* b7 = &rhs[(j2+7)*rhsStride];
- for(Index k=0; k<depth; k++)
+ Index k=0;
+ if(nr == PacketSize)
+ {
+ for(; k<peeled_k; k+=PacketSize) {
+ Kernel<Packet> kernel;
+ for (int p = 0; p < PacketSize; ++p) {
+ kernel.packet[p] = ploadu<Packet>(&rhs[(j2+p)*rhsStride+k]);
+ }
+ ptranspose(kernel);
+ for (int p = 0; p < PacketSize; ++p) {
+ pstoreu(blockB+count, cj.pconj(kernel.packet[p]));
+ count+=PacketSize;
+ }
+ }
+ }
+ for(; k<depth; k++)
{
blockB[count+0] = cj(b0[k]);
blockB[count+1] = cj(b1[k]);