diff options
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 106 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrix.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/products/SelfadjointMatrixMatrix.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixMatrix.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularSolverMatrix.h | 4 | ||||
-rwxr-xr-x | Eigen/src/Core/util/BlasUtil.h | 45 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 2 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h | 75 |
9 files changed, 106 insertions, 146 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 3ec8eb082..fa844815d 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -1025,9 +1025,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga ResPacket R0, R1, R2; ResPacket alphav = pset1<ResPacket>(alpha); - R0 = r0.loadPacket(0 * Traits::ResPacketSize); - R1 = r0.loadPacket(1 * Traits::ResPacketSize); - R2 = r0.loadPacket(2 * Traits::ResPacketSize); + R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r0.template loadPacket<ResPacket>(2 * Traits::ResPacketSize); traits.acc(C0, alphav, R0); traits.acc(C4, alphav, R1); traits.acc(C8, alphav, R2); @@ -1035,9 +1035,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga r0.storePacket(1 * Traits::ResPacketSize, R1); r0.storePacket(2 * Traits::ResPacketSize, R2); - R0 = r1.loadPacket(0 * Traits::ResPacketSize); - R1 = r1.loadPacket(1 * Traits::ResPacketSize); - R2 = r1.loadPacket(2 * Traits::ResPacketSize); + R0 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r1.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r1.template loadPacket<ResPacket>(2 * Traits::ResPacketSize); traits.acc(C1, alphav, R0); traits.acc(C5, alphav, R1); traits.acc(C9, alphav, R2); @@ -1045,9 +1045,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga r1.storePacket(1 * Traits::ResPacketSize, R1); r1.storePacket(2 * Traits::ResPacketSize, R2); - R0 = r2.loadPacket(0 * Traits::ResPacketSize); - R1 = r2.loadPacket(1 * Traits::ResPacketSize); - R2 = r2.loadPacket(2 * Traits::ResPacketSize); + R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r2.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r2.template loadPacket<ResPacket>(2 * Traits::ResPacketSize); traits.acc(C2, alphav, R0); traits.acc(C6, alphav, R1); traits.acc(C10, alphav, R2); @@ -1055,9 +1055,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga r2.storePacket(1 * Traits::ResPacketSize, R1); r2.storePacket(2 * Traits::ResPacketSize, R2); - R0 = r3.loadPacket(0 * Traits::ResPacketSize); - R1 = r3.loadPacket(1 * Traits::ResPacketSize); - R2 = r3.loadPacket(2 * Traits::ResPacketSize); + R0 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r3.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r3.template loadPacket<ResPacket>(2 * Traits::ResPacketSize); traits.acc(C3, alphav, R0); traits.acc(C7, alphav, R1); traits.acc(C11, alphav, R2); @@ -1134,9 +1134,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga ResPacket R0, R1, R2; ResPacket alphav = pset1<ResPacket>(alpha); - R0 = r0.loadPacket(0 * Traits::ResPacketSize); - R1 = r0.loadPacket(1 * Traits::ResPacketSize); - R2 = r0.loadPacket(2 * Traits::ResPacketSize); + R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r0.template loadPacket<ResPacket>(2 * Traits::ResPacketSize); traits.acc(C0, alphav, R0); traits.acc(C4, alphav, R1); traits.acc(C8, alphav, R2); @@ -1244,10 +1244,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga ResPacket R0, R1, R2, R3; ResPacket alphav = pset1<ResPacket>(alpha); - R0 = r0.loadPacket(0 * Traits::ResPacketSize); - R1 = r0.loadPacket(1 * Traits::ResPacketSize); - R2 = r1.loadPacket(0 * Traits::ResPacketSize); - R3 = r1.loadPacket(1 * Traits::ResPacketSize); + R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R3 = r1.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); traits.acc(C0, alphav, R0); traits.acc(C4, alphav, R1); traits.acc(C1, alphav, R2); @@ -1257,10 +1257,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga r1.storePacket(0 * Traits::ResPacketSize, R2); r1.storePacket(1 * Traits::ResPacketSize, R3); - R0 = r2.loadPacket(0 * Traits::ResPacketSize); - R1 = r2.loadPacket(1 * Traits::ResPacketSize); - R2 = r3.loadPacket(0 * Traits::ResPacketSize); - R3 = r3.loadPacket(1 * Traits::ResPacketSize); + R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r2.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); + R2 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R3 = r3.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); traits.acc(C2, alphav, R0); traits.acc(C6, alphav, R1); traits.acc(C3, alphav, R2); @@ -1337,8 +1337,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga ResPacket R0, R1; ResPacket alphav = pset1<ResPacket>(alpha); - R0 = r0.loadPacket(0 * Traits::ResPacketSize); - R1 = r0.loadPacket(1 * Traits::ResPacketSize); + R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize); traits.acc(C0, alphav, R0); traits.acc(C4, alphav, R1); r0.storePacket(0 * Traits::ResPacketSize, R0); @@ -1431,15 +1431,15 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga ResPacket R0, R1; ResPacket alphav = pset1<ResPacket>(alpha); - R0 = r0.loadPacket(0 * Traits::ResPacketSize); - R1 = r1.loadPacket(0 * Traits::ResPacketSize); + R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); traits.acc(C0, alphav, R0); traits.acc(C1, alphav, R1); r0.storePacket(0 * Traits::ResPacketSize, R0); r1.storePacket(0 * Traits::ResPacketSize, R1); - R0 = r2.loadPacket(0 * Traits::ResPacketSize); - R1 = r3.loadPacket(0 * Traits::ResPacketSize); + R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); + R1 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); traits.acc(C2, alphav, R0); traits.acc(C3, alphav, R1); r2.storePacket(0 * Traits::ResPacketSize, R0); @@ -1504,7 +1504,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga #undef EIGEN_GEBGP_ONESTEP ResPacket R0; ResPacket alphav = pset1<ResPacket>(alpha); - R0 = r0.loadPacket(0 * Traits::ResPacketSize); + R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize); traits.acc(C0, alphav, R0); r0.storePacket(0 * Traits::ResPacketSize, R0); } @@ -1685,19 +1685,18 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga // // 32 33 34 35 ... // 36 36 38 39 ... -template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode> -struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode> +template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode> +struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode> { typedef typename DataMapper::LinearMapper LinearMapper; EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0); }; -template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode> -EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode> +template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode> +EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode> ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset) { - typedef typename packet_traits<Scalar>::type Packet; - enum { PacketSize = packet_traits<Scalar>::size }; + enum { PacketSize = unpacket_traits<Packet>::size }; EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS"); EIGEN_UNUSED_VARIABLE(stride); @@ -1725,9 +1724,9 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Co for(Index k=0; k<depth; k++) { Packet A, B, C; - A = lhs.loadPacket(i+0*PacketSize, k); - B = lhs.loadPacket(i+1*PacketSize, k); - C = lhs.loadPacket(i+2*PacketSize, k); + A = lhs.template loadPacket<Packet>(i+0*PacketSize, k); + B = lhs.template loadPacket<Packet>(i+1*PacketSize, k); + C = lhs.template loadPacket<Packet>(i+2*PacketSize, k); pstore(blockA+count, cj.pconj(A)); count+=PacketSize; pstore(blockA+count, cj.pconj(B)); count+=PacketSize; pstore(blockA+count, cj.pconj(C)); count+=PacketSize; @@ -1745,8 +1744,8 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Co for(Index k=0; k<depth; k++) { Packet A, B; - A = lhs.loadPacket(i+0*PacketSize, k); - B = lhs.loadPacket(i+1*PacketSize, k); + A = lhs.template loadPacket<Packet>(i+0*PacketSize, k); + B = lhs.template loadPacket<Packet>(i+1*PacketSize, k); pstore(blockA+count, cj.pconj(A)); count+=PacketSize; pstore(blockA+count, cj.pconj(B)); count+=PacketSize; } @@ -1763,7 +1762,7 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Co for(Index k=0; k<depth; k++) { Packet A; - A = lhs.loadPacket(i+0*PacketSize, k); + A = lhs.template loadPacket<Packet>(i+0*PacketSize, k); pstore(blockA+count, cj.pconj(A)); count+=PacketSize; } @@ -1793,19 +1792,18 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Co } } -template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode> -struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode> +template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode> +struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode> { typedef typename DataMapper::LinearMapper LinearMapper; EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0); }; -template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode> -EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode> +template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode> +EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode> ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset) { - typedef typename packet_traits<Scalar>::type Packet; - enum { PacketSize = packet_traits<Scalar>::size }; + enum { PacketSize = unpacket_traits<Packet>::size }; EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS"); EIGEN_UNUSED_VARIABLE(stride); @@ -1837,7 +1835,7 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Ro for (Index m = 0; m < pack; m += PacketSize) { PacketBlock<Packet> kernel; - for (int p = 0; p < PacketSize; ++p) kernel.packet[p] = lhs.loadPacket(i+p+m, k); + for (int p = 0; p < PacketSize; ++p) kernel.packet[p] = lhs.template loadPacket<Packet>(i+p+m, k); ptranspose(kernel); for (int p = 0; p < PacketSize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel.packet[p])); } @@ -1971,10 +1969,10 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Co { for(; k<peeled_k; k+=PacketSize) { PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel; - kernel.packet[0] = dm0.loadPacket(k); - kernel.packet[1%PacketSize] = dm1.loadPacket(k); - kernel.packet[2%PacketSize] = dm2.loadPacket(k); - kernel.packet[3%PacketSize] = dm3.loadPacket(k); + kernel.packet[0 ] = dm0.template loadPacket<Packet>(k); + kernel.packet[1%PacketSize] = dm1.template loadPacket<Packet>(k); + kernel.packet[2%PacketSize] = dm2.template loadPacket<Packet>(k); + kernel.packet[3%PacketSize] = dm3.template loadPacket<Packet>(k); ptranspose(kernel); pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0])); pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize])); @@ -2075,7 +2073,7 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Co for(Index k=0; k<depth; k++) { if (PacketSize==4) { - Packet A = rhs.loadPacket(k, j2); + Packet A = rhs.template loadPacket<Packet>(k, j2); pstoreu(blockB+count, cj.pconj(A)); count += PacketSize; } else { diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index bd7b6ff2a..a010e150f 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -75,7 +75,7 @@ static void run(Index rows, Index cols, Index depth, Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction Index nc = (std::min)(cols,blocking.nc()); // cache block size along the N direction - gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder> pack_lhs; gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs; gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index e436c50a4..27a7921fa 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -84,7 +84,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB()); - gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder> pack_lhs; gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs; gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp; tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb; @@ -110,7 +110,6 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, gebp(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha, -1, -1, 0, 0); - sybb(_res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha); if (UpLo==Upper) @@ -160,7 +159,7 @@ struct tribb_kernel if(UpLo==Upper) gebp_kernel(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha, -1, -1, 0, 0); - + // selfadjoint micro block { Index i = j; @@ -168,6 +167,7 @@ struct tribb_kernel // 1 - apply the kernel on the temporary buffer gebp_kernel(ResMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, -1, -1, 0, 0); + // 2 - triangular accumulation for(Index j1=0; j1<actualBlockSize; ++j1) { diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index da6f82abc..c43a53b92 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -352,7 +352,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs; - gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed; + gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed; for(Index k2=0; k2<size; k2+=kc) { @@ -387,7 +387,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t for(Index i2=k2+kc; i2<size; i2+=mc) { const Index actual_mc = (std::min)(i2+mc,size)-i2; - gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>() + gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder,false>() (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc); gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha); @@ -437,7 +437,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; - gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder> pack_lhs; symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; for(Index k2=0; k2<size; k2+=kc) diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index f784507e7..7fe476be2 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -151,7 +151,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, triangularBuffer.diagonal().setOnes(); gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; - gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder> pack_lhs; gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs; for(Index k2=IsLower ? depth : 0; @@ -222,7 +222,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, for(Index i2=start; i2<end; i2+=mc) { const Index actual_mc = (std::min)(i2+mc,end)-i2; - gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>() + gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder,false>() (blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc); gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, @@ -299,7 +299,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, triangularBuffer.diagonal().setOnes(); gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; - gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder> pack_lhs; gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs; gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel; diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index 223c38b86..96d1b38a9 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -76,7 +76,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju conj_if<Conjugate> conj; gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel; - gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs; + gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, TriStorageOrder> pack_lhs; gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs; // the goal here is to subdivise the Rhs panels such that we keep some cache @@ -229,7 +229,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel; gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs; gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder,false,true> pack_rhs_panel; - gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel; + gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, ColMajor, false, true> pack_lhs_panel; for(Index k2=IsLower ? size : 0; IsLower ? k2>0 : k2<size; diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 705925984..a32630ed7 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -24,7 +24,7 @@ struct gebp_kernel; template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false> struct gemm_pack_rhs; -template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false> +template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, int StorageOrder, bool Conjugate = false, bool PanelMode = false> struct gemm_pack_lhs; template< @@ -156,11 +156,9 @@ class BlasVectorMapper { }; template<typename Scalar, typename Index, int AlignmentType> -class BlasLinearMapper { - public: - typedef typename packet_traits<Scalar>::type Packet; - typedef typename packet_traits<Scalar>::half HalfPacket; - +class BlasLinearMapper +{ +public: EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {} EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const { @@ -171,29 +169,25 @@ class BlasLinearMapper { return m_data[i]; } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { - return ploadt<Packet, AlignmentType>(m_data + i); + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const { + return ploadt<PacketType, AlignmentType>(m_data + i); } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { - return ploadt<HalfPacket, AlignmentType>(m_data + i); + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const { + pstoret<Scalar, PacketType, AlignmentType>(m_data + i, p); } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const { - pstoret<Scalar, Packet, AlignmentType>(m_data + i, p); - } - - protected: +protected: Scalar *m_data; }; // Lightweight helper class to access matrix coefficients. template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned> -class blas_data_mapper { - public: - typedef typename packet_traits<Scalar>::type Packet; - typedef typename packet_traits<Scalar>::half HalfPacket; - +class blas_data_mapper +{ +public: typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper; typedef BlasVectorMapper<Scalar, Index> VectorMapper; @@ -218,8 +212,9 @@ class blas_data_mapper { return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { - return ploadt<Packet, AlignmentType>(&operator()(i, j)); + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const { + return ploadt<PacketType, AlignmentType>(&operator()(i, j)); } template <typename PacketT, int AlignmentT> @@ -227,10 +222,6 @@ class blas_data_mapper { return ploadt<PacketT, AlignmentT>(&operator()(i, j)); } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { - return ploadt<HalfPacket, AlignmentType>(&operator()(i, j)); - } - template<typename SubPacket> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride); @@ -251,7 +242,7 @@ class blas_data_mapper { return internal::first_default_aligned(m_data, size); } - protected: +protected: Scalar* EIGEN_RESTRICT m_data; const Index m_stride; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index f0f61fade..9614aeefe 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -620,7 +620,7 @@ struct TensorContractionEvaluatorBase typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper; // Declare GEBP packing and kernel structs - internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, mr, Traits::LhsProgress, ColMajor> pack_lhs; + internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, mr, Traits::LhsProgress, typename Traits::LhsPacket, ColMajor> pack_lhs; internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, nr, ColMajor> pack_rhs; internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper, mr, nr, false, false> gebp; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h index ab320a50d..dbb0f76bb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h @@ -238,9 +238,6 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar, const contract_t& k_strides) : ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } - typedef typename Tensor::PacketReturnType Packet; - typedef typename unpacket_traits<Packet>::half HalfPacket; - template <typename PacketT,int AlignmentType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketT load(Index i, Index j) const { @@ -284,27 +281,10 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar, return pload<PacketT>(data); } - template <int AlignmentType> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { - return this->load<Packet,AlignmentType>(i,j); - } - - template <int AlignmentType> + template <typename PacketT,int AlignmentType> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { - // whole method makes column major assumption - - // don't need to add offsets for now (because operator handles that) - const Index half_packet_size = unpacket_traits<HalfPacket>::size; - if (half_packet_size == packet_size) { - return loadPacket<AlignmentType>(i, j); - } - EIGEN_ALIGN_MAX Scalar data[half_packet_size]; - for (Index k = 0; k < half_packet_size; k++) { - data[k] = operator()(i + k, j); - } - return pload<HalfPacket>(data); + EIGEN_STRONG_INLINE PacketT loadPacket(Index i, Index j) const { + return this->load<PacketT,AlignmentType>(i,j); } }; @@ -314,7 +294,8 @@ template<typename Scalar, typename Index, int side, typename nocontract_t, typename contract_t, bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment, template <class> class MakePointer_> -class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment, MakePointer_> +class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> + : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment, MakePointer_> { public: typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment, MakePointer_> ParentMapper; @@ -327,12 +308,11 @@ class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, con const contract_t& k_strides) : ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } - typedef typename Tensor::PacketReturnType Packet; - template <int> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { + template <typename PacketT,int> EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PacketT loadPacket(Index i, Index j) const { EIGEN_ALIGN_MAX Scalar data[1]; data[0] = this->m_tensor.coeff(this->computeIndex(i, j)); - return pload<typename Tensor::PacketReturnType>(data); + return pload<PacketT>(data); } template <typename PacketT,int> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketT load(Index i, Index j) const { @@ -340,10 +320,6 @@ class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, con data[0] = this->m_tensor.coeff(this->computeIndex(i, j)); return pload<PacketT>(data); } - template <int> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const { - return loadPacket(i, j); - } }; @@ -354,8 +330,6 @@ template<typename Scalar, typename Index, int side, bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment, template <class> class MakePointer_=MakePointer> class TensorContractionSubMapper { public: - typedef typename Tensor::PacketReturnType Packet; - typedef typename unpacket_traits<Packet>::half HalfPacket; typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> ParentMapper; typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> Self; @@ -390,17 +364,20 @@ class TensorContractionSubMapper { return m_base_mapper(i + m_vert_offset, j + m_horiz_offset); } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { + template <typename PacketT> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT loadPacket(Index i) const { if (UseDirectOffsets) { - return m_base_mapper.template loadPacket<Alignment>(i, 0); + return m_base_mapper.template loadPacket<PacketT,Alignment>(i, 0); } - return m_base_mapper.template loadPacket<Alignment>(i + m_vert_offset, m_horiz_offset); + return m_base_mapper.template loadPacket<PacketT,Alignment>(i + m_vert_offset, m_horiz_offset); } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { + + template <typename PacketT> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT loadPacket(Index i, Index j) const { if (UseDirectOffsets) { - return m_base_mapper.template loadPacket<Alignment>(i, j); + return m_base_mapper.template loadPacket<PacketT,Alignment>(i, j); } - return m_base_mapper.template loadPacket<Alignment>(i + m_vert_offset, j + m_horiz_offset); + return m_base_mapper.template loadPacket<PacketT,Alignment>(i + m_vert_offset, j + m_horiz_offset); } template <typename PacketT, int AlignmentType> @@ -411,14 +388,8 @@ class TensorContractionSubMapper { return m_base_mapper.template loadPacket<PacketT,AlignmentType>(i + m_vert_offset, j + m_horiz_offset); } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { - if (UseDirectOffsets) { - return m_base_mapper.template loadHalfPacket<Alignment>(i, 0); - } - return m_base_mapper.template loadHalfPacket<Alignment>(i + m_vert_offset, m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet& p) const { + template <typename PacketT> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketT& p) const { if (UseDirectOffsets) { m_base_mapper.storePacket(i, 0, p); } @@ -434,15 +405,15 @@ class TensorContractionSubMapper { template <typename PacketT, int AlignmentType> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const { - EIGEN_STATIC_ASSERT((internal::is_same<PacketT, Packet>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::is_same<PacketT, PacketT>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); const int ActualAlignment = (AlignmentType == Aligned) && (Alignment == Aligned) ? Aligned : Unaligned; if (UseDirectOffsets) { - return m_base_mapper.template loadPacket<ActualAlignment>(i, 0); + return m_base_mapper.template loadPacket<PacketT,ActualAlignment>(i, 0); } - return m_base_mapper.template loadPacket<ActualAlignment>(i + m_vert_offset, m_horiz_offset); + return m_base_mapper.template loadPacket<PacketT,ActualAlignment>(i + m_vert_offset, m_horiz_offset); } - template <typename Packet> + template <typename PacketT> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool aligned(Index) const { return false; } |