From 8ce9630ddbce28c8dbd8f027239ee7026f5abd49 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 15 May 2020 20:47:32 +0200 Subject: Fix #1874: workaround MSVC 2017 compilation issue. --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 168 +++++++++++----------- 1 file changed, 82 insertions(+), 86 deletions(-) (limited to 'Eigen') diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 64e7f79cf..73198b3a2 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -2682,100 +2682,96 @@ struct gemm_pack_rhs::size, HasHalf = (int)HalfPacketSize < (int)PacketSize, HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize }; - EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0); -}; - -template -EIGEN_DONT_INLINE void gemm_pack_rhs - ::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset) -{ - EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR"); - EIGEN_UNUSED_VARIABLE(stride); - EIGEN_UNUSED_VARIABLE(offset); - eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); - conj_if::IsComplex && Conjugate> cj; - Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0; - Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0; - Index count = 0; - -// if(nr>=8) -// { -// for(Index j2=0; j2(&rhs[k*rhsStride + j2]); -// pstoreu(blockB+count, cj.pconj(A)); -// } else if (PacketSize==4) { -// Packet A = ploadu(&rhs[k*rhsStride + j2]); -// Packet B = ploadu(&rhs[k*rhsStride + j2 + PacketSize]); -// pstoreu(blockB+count, cj.pconj(A)); -// pstoreu(blockB+count+PacketSize, cj.pconj(B)); -// } else { -// const Scalar* b0 = &rhs[k*rhsStride + j2]; -// blockB[count+0] = cj(b0[0]); -// blockB[count+1] = cj(b0[1]); -// blockB[count+2] = cj(b0[2]); -// blockB[count+3] = cj(b0[3]); -// blockB[count+4] = cj(b0[4]); -// blockB[count+5] = cj(b0[5]); -// blockB[count+6] = cj(b0[6]); -// blockB[count+7] = cj(b0[7]); -// } -// count += 8; -// } -// // skip what we have after -// if(PanelMode) count += 8 * (stride-offset-depth); -// } -// } - if(nr>=4) - { - for(Index j2=packet_cols8; j2=depth && offset<=stride)); + conj_if::IsComplex && Conjugate> cj; + Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0; + Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0; + Index count = 0; + + // if(nr>=8) + // { + // for(Index j2=0; j2(&rhs[k*rhsStride + j2]); + // pstoreu(blockB+count, cj.pconj(A)); + // } else if (PacketSize==4) { + // Packet A = ploadu(&rhs[k*rhsStride + j2]); + // Packet B = ploadu(&rhs[k*rhsStride + j2 + PacketSize]); + // pstoreu(blockB+count, cj.pconj(A)); + // pstoreu(blockB+count+PacketSize, cj.pconj(B)); + // } else { + // const Scalar* b0 = &rhs[k*rhsStride + j2]; + // blockB[count+0] = cj(b0[0]); + // blockB[count+1] = cj(b0[1]); + // blockB[count+2] = cj(b0[2]); + // blockB[count+3] = cj(b0[3]); + // blockB[count+4] = cj(b0[4]); + // blockB[count+5] = cj(b0[5]); + // blockB[count+6] = cj(b0[6]); + // blockB[count+7] = cj(b0[7]); + // } + // count += 8; + // } + // // skip what we have after + // if(PanelMode) count += 8 * (stride-offset-depth); + // } + // } + if(nr>=4) { - // skip what we have before - if(PanelMode) count += 4 * offset; - for(Index k=0; k(k, j2); - pstoreu(blockB+count, cj.pconj(A)); - count += PacketSize; - } else if (HasHalf && HalfPacketSize==4) { - HalfPacket A = rhs.template loadPacket(k, j2); - pstoreu(blockB+count, cj.pconj(A)); - count += HalfPacketSize; - } else if (HasQuarter && QuarterPacketSize==4) { - QuarterPacket A = rhs.template loadPacket(k, j2); - pstoreu(blockB+count, cj.pconj(A)); - count += QuarterPacketSize; - } else { - const LinearMapper dm0 = rhs.getLinearMapper(k, j2); - blockB[count+0] = cj(dm0(0)); - blockB[count+1] = cj(dm0(1)); - blockB[count+2] = cj(dm0(2)); - blockB[count+3] = cj(dm0(3)); - count += 4; + // skip what we have before + if(PanelMode) count += 4 * offset; + for(Index k=0; k(k, j2); + pstoreu(blockB+count, cj.pconj(A)); + count += PacketSize; + } else if (HasHalf && HalfPacketSize==4) { + HalfPacket A = rhs.template loadPacket(k, j2); + pstoreu(blockB+count, cj.pconj(A)); + count += HalfPacketSize; + } else if (HasQuarter && QuarterPacketSize==4) { + QuarterPacket A = rhs.template loadPacket(k, j2); + pstoreu(blockB+count, cj.pconj(A)); + count += QuarterPacketSize; + } else { + const LinearMapper dm0 = rhs.getLinearMapper(k, j2); + blockB[count+0] = cj(dm0(0)); + blockB[count+1] = cj(dm0(1)); + blockB[count+2] = cj(dm0(2)); + blockB[count+3] = cj(dm0(3)); + count += 4; + } } + // skip what we have after + if(PanelMode) count += 4 * (stride-offset-depth); } - // skip what we have after - if(PanelMode) count += 4 * (stride-offset-depth); } - } - // copy the remaining columns one at a time (nr==1) - for(Index j2=packet_cols4; j2