diff options
author | 2010-03-04 18:47:52 +0100 | |
---|---|---|
committer | 2010-03-04 18:47:52 +0100 | |
commit | cefd9b888868bca6b23d67c0e6c69c49582508c3 (patch) | |
tree | e7cd032e6475afcee813cbefdf01d3c5bda33d9a /Eigen/src/Core/products | |
parent | 65eba35f98941a1d5c7ff6f854ed17224ef65b40 (diff) | |
parent | 8ed1ef446998dc35f738ad9984cf479dbfc2cc6c (diff) |
merge with default branch
Diffstat (limited to 'Eigen/src/Core/products')
-rw-r--r-- | Eigen/src/Core/products/CoeffBasedProduct.h | 20 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/products/SelfadjointMatrixMatrix.h | 26 |
3 files changed, 28 insertions, 22 deletions
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h index 3343b1875..e8016e915 100644 --- a/Eigen/src/Core/products/CoeffBasedProduct.h +++ b/Eigen/src/Core/products/CoeffBasedProduct.h @@ -305,10 +305,7 @@ struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - res = ei_dot_impl< - Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>, - Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>, - LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs.col(col)); + res = lhs.row(row).cwiseProduct(rhs.col(col)).sum(); } }; @@ -319,10 +316,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> { EIGEN_STRONG_INLINE static void run(int /*row*/, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - res = ei_dot_impl< - Lhs, - Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>, - LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs.col(col)); + res = lhs.cwiseProduct(rhs.col(col)).sum(); } }; @@ -331,10 +325,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> { EIGEN_STRONG_INLINE static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - res = ei_dot_impl< - Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>, - Rhs, - LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs); + res = lhs.row(row).cwiseProduct(rhs).sum(); } }; @@ -343,10 +334,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> { EIGEN_STRONG_INLINE static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - res = ei_dot_impl< - Lhs, - Rhs, - LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs); + res = lhs.cwiseProduct(rhs).sum(); } }; diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 6836a10de..8ac5afb05 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -31,6 +31,7 @@ #define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C); #else #define CJMADD(A,B,C,T) T = B; T = cj.pmul(A,T); C = ei_padd(C,T); +// #define CJMADD(A,B,C,T) T = A; T = cj.pmul(T,B); C = ei_padd(C,T); #endif // optimized GEneral packed Block * packed Panel product kernel @@ -146,7 +147,7 @@ struct ei_gebp_kernel #endif // performs "inner" product - // TODO let's check wether the flowing peeled loop could not be + // TODO let's check wether the folowing peeled loop could not be // optimized via optimal prefetching from one loop to the other const Scalar* blB = unpackedB; for(int k=0; k<peeled_kc; k+=4) @@ -409,6 +410,7 @@ struct ei_gebp_kernel CJMADD(A0,B2,C2,B2); B2 = ei_pload(&blB[14*PacketSize]); CJMADD(A0,B3,C3,B3); + A0 = ei_pload(&blA[3*PacketSize]); B3 = ei_pload(&blB[15*PacketSize]); CJMADD(A0,B0,C0,B0); diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index 785045db4..2e71b5fd4 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -43,7 +43,10 @@ struct ei_symm_pack_lhs { for(int w=0; w<h; w++) blockA[count++] = ei_conj(lhs(k, i+w)); // transposed - for(int w=h; w<BlockRows; w++) + + blockA[count++] = ei_real(lhs(k,k)); // real (diagonal) + + for(int w=h+1; w<BlockRows; w++) blockA[count++] = lhs(i+w, k); // normal ++h; } @@ -71,8 +74,11 @@ struct ei_symm_pack_lhs // do the same with mr==1 for(int i=peeled_mc; i<rows; i++) { - for(int k=0; k<=i; k++) + for(int k=0; k<i; k++) blockA[count++] = lhs(i, k); // normal + + blockA[count++] = ei_real(lhs(i, i)); // real (diagonal) + for(int k=i+1; k<cols; k++) blockA[count++] = ei_conj(lhs(k, i)); // transposed } @@ -129,8 +135,11 @@ struct ei_symm_pack_rhs // normal for (int w=0 ; w<h; ++w) blockB[count+w] = alpha*rhs(k,j2+w); + + blockB[count+h] = alpha*rhs(k,k); + // transpose - for (int w=h ; w<nr; ++w) + for (int w=h+1 ; w<nr; ++w) blockB[count+w] = alpha*ei_conj(rhs(j2+w,k)); count += nr; ++h; @@ -175,8 +184,15 @@ struct ei_symm_pack_rhs blockB[count] = alpha*ei_conj(rhs(j2,k)); count += 1; } + + if(half==j2) + { + blockB[count] = alpha*ei_real(rhs(j2,j2)); + count += 1; + } + // normal - for(int k=half; k<k2+rows; k++) + for(int k=half+1; k<k2+rows; k++) { blockB[count] = alpha*rhs(k,j2); count += 1; @@ -389,7 +405,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> * RhsBlasTraits::extractScalarFactor(m_rhs); ei_product_selfadjoint_matrix<Scalar, - EIGEN_LOGICAL_XOR(LhsIsUpper, + EIGEN_LOGICAL_XOR(LhsIsUpper, ei_traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), EIGEN_LOGICAL_XOR(RhsIsUpper, |