diff options
author | Gael Guennebaud <g.gael@free.fr> | 2010-07-12 16:31:46 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2010-07-12 16:31:46 +0200 |
commit | b72b7ab76fd6f3389f358ca412a32f519063250c (patch) | |
tree | 89aef2066fae898fdf7302aebfc3b315defc7829 /Eigen/src/Core/products/SelfadjointMatrixMatrix.h | |
parent | f8678272a4244babe25cc92bbb9298ed922330a4 (diff) |
matrix product: move the alpha factor to gebp instead of the packing,
clean some temporaries, etc.
Diffstat (limited to 'Eigen/src/Core/products/SelfadjointMatrixMatrix.h')
-rw-r--r-- | Eigen/src/Core/products/SelfadjointMatrixMatrix.h | 64 |
1 files changed, 29 insertions, 35 deletions
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index 27eb4b2f8..0488114b9 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -89,7 +89,7 @@ template<typename Scalar, typename Index, int nr, int StorageOrder> struct ei_symm_pack_rhs { enum { PacketSize = ei_packet_traits<Scalar>::size }; - void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Scalar alpha, Index rows, Index cols, Index k2) + void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2) { Index end_k = k2 + rows; Index count = 0; @@ -101,12 +101,12 @@ struct ei_symm_pack_rhs { for(Index k=k2; k<end_k; k++) { - blockB[count+0] = alpha*rhs(k,j2+0); - blockB[count+1] = alpha*rhs(k,j2+1); + blockB[count+0] = rhs(k,j2+0); + blockB[count+1] = rhs(k,j2+1); if (nr==4) { - blockB[count+2] = alpha*rhs(k,j2+2); - blockB[count+3] = alpha*rhs(k,j2+3); + blockB[count+2] = rhs(k,j2+2); + blockB[count+3] = rhs(k,j2+3); } count += nr; } @@ -119,12 +119,12 @@ struct ei_symm_pack_rhs // transpose for(Index k=k2; k<j2; k++) { - blockB[count+0] = alpha*ei_conj(rhs(j2+0,k)); - blockB[count+1] = alpha*ei_conj(rhs(j2+1,k)); + blockB[count+0] = ei_conj(rhs(j2+0,k)); + blockB[count+1] = ei_conj(rhs(j2+1,k)); if (nr==4) { - blockB[count+2] = alpha*ei_conj(rhs(j2+2,k)); - blockB[count+3] = alpha*ei_conj(rhs(j2+3,k)); + blockB[count+2] = ei_conj(rhs(j2+2,k)); + blockB[count+3] = ei_conj(rhs(j2+3,k)); } count += nr; } @@ -134,25 +134,25 @@ struct ei_symm_pack_rhs { // normal for (Index w=0 ; w<h; ++w) - blockB[count+w] = alpha*rhs(k,j2+w); + blockB[count+w] = rhs(k,j2+w); - blockB[count+h] = alpha*rhs(k,k); + blockB[count+h] = rhs(k,k); // transpose for (Index w=h+1 ; w<nr; ++w) - blockB[count+w] = alpha*ei_conj(rhs(j2+w,k)); + blockB[count+w] = ei_conj(rhs(j2+w,k)); count += nr; ++h; } // normal for(Index k=j2+nr; k<end_k; k++) { - blockB[count+0] = alpha*rhs(k,j2+0); - blockB[count+1] = alpha*rhs(k,j2+1); + blockB[count+0] = rhs(k,j2+0); + blockB[count+1] = rhs(k,j2+1); if (nr==4) { - blockB[count+2] = alpha*rhs(k,j2+2); - blockB[count+3] = alpha*rhs(k,j2+3); + blockB[count+2] = rhs(k,j2+2); + blockB[count+3] = rhs(k,j2+3); } count += nr; } @@ -163,12 +163,12 @@ struct ei_symm_pack_rhs { for(Index k=k2; k<end_k; k++) { - blockB[count+0] = alpha*ei_conj(rhs(j2+0,k)); - blockB[count+1] = alpha*ei_conj(rhs(j2+1,k)); + blockB[count+0] = ei_conj(rhs(j2+0,k)); + blockB[count+1] = ei_conj(rhs(j2+1,k)); if (nr==4) { - blockB[count+2] = alpha*ei_conj(rhs(j2+2,k)); - blockB[count+3] = alpha*ei_conj(rhs(j2+3,k)); + blockB[count+2] = ei_conj(rhs(j2+2,k)); + blockB[count+3] = ei_conj(rhs(j2+3,k)); } count += nr; } @@ -181,13 +181,13 @@ struct ei_symm_pack_rhs Index half = std::min(end_k,j2); for(Index k=k2; k<half; k++) { - blockB[count] = alpha*ei_conj(rhs(j2,k)); + blockB[count] = ei_conj(rhs(j2,k)); count += 1; } if(half==j2 && half<k2+rows) { - blockB[count] = alpha*ei_real(rhs(j2,j2)); + blockB[count] = ei_real(rhs(j2,j2)); count += 1; } else @@ -196,7 +196,7 @@ struct ei_symm_pack_rhs // normal for(Index k=half+1; k<k2+rows; k++) { - blockB[count] = alpha*rhs(k,j2); + blockB[count] = rhs(k,j2); count += 1; } } @@ -253,9 +253,6 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); - if (ConjugateRhs) - alpha = ei_conj(alpha); - typedef ei_product_blocking_traits<Scalar,Scalar> Blocking; Index kc = size; // cache block size along the K direction @@ -282,7 +279,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate // we have selected one row panel of rhs and one column panel of lhs // pack rhs's panel into a sequential chunk of memory // and expand each coeff to a constant packet for further reuse - pack_rhs(blockB, &rhs(k2,0), rhsStride, alpha, actual_kc, cols); + pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols); // the select lhs's panel has to be split in three different parts: // 1 - the transposed panel above the diagonal block => transposed packed copy @@ -294,7 +291,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate // transposed packed copy pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc); - gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols); + gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); } // the block diagonal { @@ -302,7 +299,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate // symmetric packed copy pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc); - gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols); + gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); } for(Index i2=k2+kc; i2<size; i2+=mc) @@ -311,7 +308,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder,false>() (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); - gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols); + gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); } } @@ -338,9 +335,6 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); - if (ConjugateRhs) - alpha = ei_conj(alpha); - typedef ei_product_blocking_traits<Scalar,Scalar> Blocking; Index kc = size; // cache block size along the K direction @@ -361,7 +355,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat { const Index actual_kc = std::min(k2+kc,size)-k2; - pack_rhs(blockB, _rhs, rhsStride, alpha, actual_kc, cols, k2); + pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2); // => GEPP for(Index i2=0; i2<rows; i2+=mc) @@ -369,7 +363,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat const Index actual_mc = std::min(i2+mc,rows)-i2; pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); - gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols); + gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); } } |