diff options
author | Gael Guennebaud <g.gael@free.fr> | 2020-05-15 20:47:32 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2020-05-15 20:47:32 +0200 |
commit | 8ce9630ddbce28c8dbd8f027239ee7026f5abd49 (patch) | |
tree | 8a1e0d406bf70357405a967d5a67b2d4e423cfb0 | |
parent | 9b411757abd8458f9689b1384c6bf75da9b82357 (diff) |
Fix #1874: workaround MSVC 2017 compilation issue.
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 168 |
1 files changed, 82 insertions, 86 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 64e7f79cf..73198b3a2 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -2682,100 +2682,96 @@ struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMo QuarterPacketSize = unpacket_traits<QuarterPacket>::size, HasHalf = (int)HalfPacketSize < (int)PacketSize, HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize }; - EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0); -}; - -template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode> -EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode> - ::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset) -{ - EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR"); - EIGEN_UNUSED_VARIABLE(stride); - EIGEN_UNUSED_VARIABLE(offset); - eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); - conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; - Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0; - Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0; - Index count = 0; - -// if(nr>=8) -// { -// for(Index j2=0; j2<packet_cols8; j2+=8) -// { -// // skip what we have before -// if(PanelMode) count += 8 * offset; -// for(Index k=0; k<depth; k++) -// { -// if (PacketSize==8) { -// Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]); -// pstoreu(blockB+count, cj.pconj(A)); -// } else if (PacketSize==4) { -// Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]); -// Packet B = ploadu<Packet>(&rhs[k*rhsStride + j2 + PacketSize]); -// pstoreu(blockB+count, cj.pconj(A)); -// pstoreu(blockB+count+PacketSize, cj.pconj(B)); -// } else { -// const Scalar* b0 = &rhs[k*rhsStride + j2]; -// blockB[count+0] = cj(b0[0]); -// blockB[count+1] = cj(b0[1]); -// blockB[count+2] = cj(b0[2]); -// blockB[count+3] = cj(b0[3]); -// blockB[count+4] = cj(b0[4]); -// blockB[count+5] = cj(b0[5]); -// blockB[count+6] = cj(b0[6]); -// blockB[count+7] = cj(b0[7]); -// } -// count += 8; -// } -// // skip what we have after -// if(PanelMode) count += 8 * (stride-offset-depth); -// } -// } - if(nr>=4) - { - for(Index j2=packet_cols8; j2<packet_cols4; j2+=4) + EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0) + { + EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR"); + EIGEN_UNUSED_VARIABLE(stride); + EIGEN_UNUSED_VARIABLE(offset); + eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; + Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0; + Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0; + Index count = 0; + + // if(nr>=8) + // { + // for(Index j2=0; j2<packet_cols8; j2+=8) + // { + // // skip what we have before + // if(PanelMode) count += 8 * offset; + // for(Index k=0; k<depth; k++) + // { + // if (PacketSize==8) { + // Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]); + // pstoreu(blockB+count, cj.pconj(A)); + // } else if (PacketSize==4) { + // Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]); + // Packet B = ploadu<Packet>(&rhs[k*rhsStride + j2 + PacketSize]); + // pstoreu(blockB+count, cj.pconj(A)); + // pstoreu(blockB+count+PacketSize, cj.pconj(B)); + // } else { + // const Scalar* b0 = &rhs[k*rhsStride + j2]; + // blockB[count+0] = cj(b0[0]); + // blockB[count+1] = cj(b0[1]); + // blockB[count+2] = cj(b0[2]); + // blockB[count+3] = cj(b0[3]); + // blockB[count+4] = cj(b0[4]); + // blockB[count+5] = cj(b0[5]); + // blockB[count+6] = cj(b0[6]); + // blockB[count+7] = cj(b0[7]); + // } + // count += 8; + // } + // // skip what we have after + // if(PanelMode) count += 8 * (stride-offset-depth); + // } + // } + if(nr>=4) { - // skip what we have before - if(PanelMode) count += 4 * offset; - for(Index k=0; k<depth; k++) + for(Index j2=packet_cols8; j2<packet_cols4; j2+=4) { - if (PacketSize==4) { - Packet A = rhs.template loadPacket<Packet>(k, j2); - pstoreu(blockB+count, cj.pconj(A)); - count += PacketSize; - } else if (HasHalf && HalfPacketSize==4) { - HalfPacket A = rhs.template loadPacket<HalfPacket>(k, j2); - pstoreu(blockB+count, cj.pconj(A)); - count += HalfPacketSize; - } else if (HasQuarter && QuarterPacketSize==4) { - QuarterPacket A = rhs.template loadPacket<QuarterPacket>(k, j2); - pstoreu(blockB+count, cj.pconj(A)); - count += QuarterPacketSize; - } else { - const LinearMapper dm0 = rhs.getLinearMapper(k, j2); - blockB[count+0] = cj(dm0(0)); - blockB[count+1] = cj(dm0(1)); - blockB[count+2] = cj(dm0(2)); - blockB[count+3] = cj(dm0(3)); - count += 4; + // skip what we have before + if(PanelMode) count += 4 * offset; + for(Index k=0; k<depth; k++) + { + if (PacketSize==4) { + Packet A = rhs.template loadPacket<Packet>(k, j2); + pstoreu(blockB+count, cj.pconj(A)); + count += PacketSize; + } else if (HasHalf && HalfPacketSize==4) { + HalfPacket A = rhs.template loadPacket<HalfPacket>(k, j2); + pstoreu(blockB+count, cj.pconj(A)); + count += HalfPacketSize; + } else if (HasQuarter && QuarterPacketSize==4) { + QuarterPacket A = rhs.template loadPacket<QuarterPacket>(k, j2); + pstoreu(blockB+count, cj.pconj(A)); + count += QuarterPacketSize; + } else { + const LinearMapper dm0 = rhs.getLinearMapper(k, j2); + blockB[count+0] = cj(dm0(0)); + blockB[count+1] = cj(dm0(1)); + blockB[count+2] = cj(dm0(2)); + blockB[count+3] = cj(dm0(3)); + count += 4; + } } + // skip what we have after + if(PanelMode) count += 4 * (stride-offset-depth); } - // skip what we have after - if(PanelMode) count += 4 * (stride-offset-depth); } - } - // copy the remaining columns one at a time (nr==1) - for(Index j2=packet_cols4; j2<cols; ++j2) - { - if(PanelMode) count += offset; - for(Index k=0; k<depth; k++) + // copy the remaining columns one at a time (nr==1) + for(Index j2=packet_cols4; j2<cols; ++j2) { - blockB[count] = cj(rhs(k, j2)); - count += 1; + if(PanelMode) count += offset; + for(Index k=0; k<depth; k++) + { + blockB[count] = cj(rhs(k, j2)); + count += 1; + } + if(PanelMode) count += stride-offset-depth; } - if(PanelMode) count += stride-offset-depth; } -} +}; } // end namespace internal |