diff options
author | Gael Guennebaud <g.gael@free.fr> | 2018-09-20 17:01:24 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2018-09-20 17:01:24 +0200 |
commit | 71496b0e25158fe6c47dd8c959748b74be4ca94c (patch) | |
tree | b68794605ac9b8275459d108d7c8e1a37e75fe82 /Eigen/src/Core/products/GeneralBlockPanelKernel.h | |
parent | 5a30eed17e170af4aedca1a3ff0c10a8e65bf47e (diff) |
Fix gebp kernel for real+complex in case only reals are vectorized (e.g., AVX512).
This commit also removes "half-packet" from data-mappers: it was not used and conceptually broken anyways.
Diffstat (limited to 'Eigen/src/Core/products/GeneralBlockPanelKernel.h')
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 106 |
1 files changed, 52 insertions, 54 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 3ec8eb082..fa844815d 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -1025,9 +1025,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga ResPacket R0, R1, R2; ResPacket alphav = pset1<ResPacket>(alpha); - R0 = r0.loadPacket(0 * Traits::ResPacketSize); - R1 = r0.loadPacket(1 * Traits::ResPacketSize); - R2 = r0.loadPacket(2 * Traits::ResPacketSize); + R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r0.template loadPacket<ResPacket>(2 * Traits::ResPacketSize); traits.acc(C0, alphav, R0); traits.acc(C4, alphav, R1); traits.acc(C8, alphav, R2); @@ -1035,9 +1035,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga r0.storePacket(1 * Traits::ResPacketSize, R1); r0.storePacket(2 * Traits::ResPacketSize, R2); - R0 = r1.loadPacket(0 * Traits::ResPacketSize); - R1 = r1.loadPacket(1 * Traits::ResPacketSize); - R2 = r1.loadPacket(2 * Traits::ResPacketSize); + R0 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r1.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r1.template loadPacket<ResPacket>(2 * Traits::ResPacketSize); traits.acc(C1, alphav, R0); traits.acc(C5, alphav, R1); traits.acc(C9, alphav, R2); @@ -1045,9 +1045,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga r1.storePacket(1 * Traits::ResPacketSize, R1); r1.storePacket(2 * Traits::ResPacketSize, R2); - R0 = r2.loadPacket(0 * Traits::ResPacketSize); - R1 = r2.loadPacket(1 * Traits::ResPacketSize); - R2 = r2.loadPacket(2 * Traits::ResPacketSize); + R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r2.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r2.template loadPacket<ResPacket>(2 * Traits::ResPacketSize); traits.acc(C2, alphav, R0); traits.acc(C6, alphav, R1); traits.acc(C10, alphav, R2); @@ -1055,9 +1055,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga r2.storePacket(1 * Traits::ResPacketSize, R1); r2.storePacket(2 * Traits::ResPacketSize, R2); - R0 = r3.loadPacket(0 * Traits::ResPacketSize); - R1 = r3.loadPacket(1 * Traits::ResPacketSize); - R2 = r3.loadPacket(2 * Traits::ResPacketSize); + R0 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r3.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r3.template loadPacket<ResPacket>(2 * Traits::ResPacketSize); traits.acc(C3, alphav, R0); traits.acc(C7, alphav, R1); traits.acc(C11, alphav, R2); @@ -1134,9 +1134,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga ResPacket R0, R1, R2; ResPacket alphav = pset1<ResPacket>(alpha); - R0 = r0.loadPacket(0 * Traits::ResPacketSize); - R1 = r0.loadPacket(1 * Traits::ResPacketSize); - R2 = r0.loadPacket(2 * Traits::ResPacketSize); + R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r0.template loadPacket<ResPacket>(2 * Traits::ResPacketSize); traits.acc(C0, alphav, R0); traits.acc(C4, alphav, R1); traits.acc(C8, alphav, R2); @@ -1244,10 +1244,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga ResPacket R0, R1, R2, R3; ResPacket alphav = pset1<ResPacket>(alpha); - R0 = r0.loadPacket(0 * Traits::ResPacketSize); - R1 = r0.loadPacket(1 * Traits::ResPacketSize); - R2 = r1.loadPacket(0 * Traits::ResPacketSize); - R3 = r1.loadPacket(1 * Traits::ResPacketSize); + R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R3 = r1.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); traits.acc(C0, alphav, R0); traits.acc(C4, alphav, R1); traits.acc(C1, alphav, R2); @@ -1257,10 +1257,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga r1.storePacket(0 * Traits::ResPacketSize, R2); r1.storePacket(1 * Traits::ResPacketSize, R3); - R0 = r2.loadPacket(0 * Traits::ResPacketSize); - R1 = r2.loadPacket(1 * Traits::ResPacketSize); - R2 = r3.loadPacket(0 * Traits::ResPacketSize); - R3 = r3.loadPacket(1 * Traits::ResPacketSize); + R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r2.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R3 = r3.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); traits.acc(C2, alphav, R0); traits.acc(C6, alphav, R1); traits.acc(C3, alphav, R2); @@ -1337,8 +1337,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga ResPacket R0, R1; ResPacket alphav = pset1<ResPacket>(alpha); - R0 = r0.loadPacket(0 * Traits::ResPacketSize); - R1 = r0.loadPacket(1 * Traits::ResPacketSize); + R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); traits.acc(C0, alphav, R0); traits.acc(C4, alphav, R1); r0.storePacket(0 * Traits::ResPacketSize, R0); @@ -1431,15 +1431,15 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga ResPacket R0, R1; ResPacket alphav = pset1<ResPacket>(alpha); - R0 = r0.loadPacket(0 * Traits::ResPacketSize); - R1 = r1.loadPacket(0 * Traits::ResPacketSize); + R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); traits.acc(C0, alphav, R0); traits.acc(C1, alphav, R1); r0.storePacket(0 * Traits::ResPacketSize, R0); r1.storePacket(0 * Traits::ResPacketSize, R1); - R0 = r2.loadPacket(0 * Traits::ResPacketSize); - R1 = r3.loadPacket(0 * Traits::ResPacketSize); + R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); traits.acc(C2, alphav, R0); traits.acc(C3, alphav, R1); r2.storePacket(0 * Traits::ResPacketSize, R0); @@ -1504,7 +1504,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga #undef EIGEN_GEBGP_ONESTEP ResPacket R0; ResPacket alphav = pset1<ResPacket>(alpha); - R0 = r0.loadPacket(0 * Traits::ResPacketSize); + R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); traits.acc(C0, alphav, R0); r0.storePacket(0 * Traits::ResPacketSize, R0); } @@ -1685,19 +1685,18 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga // // 32 33 34 35 ... // 36 36 38 39 ... -template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode> -struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode> +template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode> +struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode> { typedef typename DataMapper::LinearMapper LinearMapper; EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0); }; -template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode> -EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode> +template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode> +EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode> ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset) { - typedef typename packet_traits<Scalar>::type Packet; - enum { PacketSize = packet_traits<Scalar>::size }; + enum { PacketSize = unpacket_traits<Packet>::size }; EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS"); EIGEN_UNUSED_VARIABLE(stride); @@ -1725,9 +1724,9 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Co for(Index k=0; k<depth; k++) { Packet A, B, C; - A = lhs.loadPacket(i+0*PacketSize, k); - B = lhs.loadPacket(i+1*PacketSize, k); - C = lhs.loadPacket(i+2*PacketSize, k); + A = lhs.template loadPacket<Packet>(i+0*PacketSize, k); + B = lhs.template loadPacket<Packet>(i+1*PacketSize, k); + C = lhs.template loadPacket<Packet>(i+2*PacketSize, k); pstore(blockA+count, cj.pconj(A)); count+=PacketSize; pstore(blockA+count, cj.pconj(B)); count+=PacketSize; pstore(blockA+count, cj.pconj(C)); count+=PacketSize; @@ -1745,8 +1744,8 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Co for(Index k=0; k<depth; k++) { Packet A, B; - A = lhs.loadPacket(i+0*PacketSize, k); - B = lhs.loadPacket(i+1*PacketSize, k); + A = lhs.template loadPacket<Packet>(i+0*PacketSize, k); + B = lhs.template loadPacket<Packet>(i+1*PacketSize, k); pstore(blockA+count, cj.pconj(A)); count+=PacketSize; pstore(blockA+count, cj.pconj(B)); count+=PacketSize; } @@ -1763,7 +1762,7 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Co for(Index k=0; k<depth; k++) { Packet A; - A = lhs.loadPacket(i+0*PacketSize, k); + A = lhs.template loadPacket<Packet>(i+0*PacketSize, k); pstore(blockA+count, cj.pconj(A)); count+=PacketSize; } @@ -1793,19 +1792,18 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Co } } -template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode> -struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode> +template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode> +struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode> { typedef typename DataMapper::LinearMapper LinearMapper; EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0); }; -template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode> -EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode> +template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode> +EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode> ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset) { - typedef typename packet_traits<Scalar>::type Packet; - enum { PacketSize = packet_traits<Scalar>::size }; + enum { PacketSize = unpacket_traits<Packet>::size }; EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS"); EIGEN_UNUSED_VARIABLE(stride); @@ -1837,7 +1835,7 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Ro for (Index m = 0; m < pack; m += PacketSize) { PacketBlock<Packet> kernel; - for (int p = 0; p < PacketSize; ++p) kernel.packet[p] = lhs.loadPacket(i+p+m, k); + for (int p = 0; p < PacketSize; ++p) kernel.packet[p] = lhs.template loadPacket<Packet>(i+p+m, k); ptranspose(kernel); for (int p = 0; p < PacketSize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel.packet[p])); } @@ -1971,10 +1969,10 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Co { for(; k<peeled_k; k+=PacketSize) { PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel; - kernel.packet[0] = dm0.loadPacket(k); - kernel.packet[1%PacketSize] = dm1.loadPacket(k); - kernel.packet[2%PacketSize] = dm2.loadPacket(k); - kernel.packet[3%PacketSize] = dm3.loadPacket(k); + kernel.packet[0 ] = dm0.template loadPacket<Packet>(k); + kernel.packet[1%PacketSize] = dm1.template loadPacket<Packet>(k); + kernel.packet[2%PacketSize] = dm2.template loadPacket<Packet>(k); + kernel.packet[3%PacketSize] = dm3.template loadPacket<Packet>(k); ptranspose(kernel); pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0])); pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize])); @@ -2075,7 +2073,7 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Co for(Index k=0; k<depth; k++) { if (PacketSize==4) { - Packet A = rhs.loadPacket(k, j2); + Packet A = rhs.template loadPacket<Packet>(k, j2); pstoreu(blockB+count, cj.pconj(A)); count += PacketSize; } else { |