aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2010-07-12 16:31:46 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2010-07-12 16:31:46 +0200
commitb72b7ab76fd6f3389f358ca412a32f519063250c (patch)
tree89aef2066fae898fdf7302aebfc3b315defc7829 /Eigen/src/Core/products/SelfadjointMatrixMatrix.h
parentf8678272a4244babe25cc92bbb9298ed922330a4 (diff)
matrix product: move the alpha factor to gebp instead of the packing,
clean some temporaries, etc.
Diffstat (limited to 'Eigen/src/Core/products/SelfadjointMatrixMatrix.h')
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix.h64
1 files changed, 29 insertions, 35 deletions
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index 27eb4b2f8..0488114b9 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -89,7 +89,7 @@ template<typename Scalar, typename Index, int nr, int StorageOrder>
struct ei_symm_pack_rhs
{
enum { PacketSize = ei_packet_traits<Scalar>::size };
- void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Scalar alpha, Index rows, Index cols, Index k2)
+ void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
{
Index end_k = k2 + rows;
Index count = 0;
@@ -101,12 +101,12 @@ struct ei_symm_pack_rhs
{
for(Index k=k2; k<end_k; k++)
{
- blockB[count+0] = alpha*rhs(k,j2+0);
- blockB[count+1] = alpha*rhs(k,j2+1);
+ blockB[count+0] = rhs(k,j2+0);
+ blockB[count+1] = rhs(k,j2+1);
if (nr==4)
{
- blockB[count+2] = alpha*rhs(k,j2+2);
- blockB[count+3] = alpha*rhs(k,j2+3);
+ blockB[count+2] = rhs(k,j2+2);
+ blockB[count+3] = rhs(k,j2+3);
}
count += nr;
}
@@ -119,12 +119,12 @@ struct ei_symm_pack_rhs
// transpose
for(Index k=k2; k<j2; k++)
{
- blockB[count+0] = alpha*ei_conj(rhs(j2+0,k));
- blockB[count+1] = alpha*ei_conj(rhs(j2+1,k));
+ blockB[count+0] = ei_conj(rhs(j2+0,k));
+ blockB[count+1] = ei_conj(rhs(j2+1,k));
if (nr==4)
{
- blockB[count+2] = alpha*ei_conj(rhs(j2+2,k));
- blockB[count+3] = alpha*ei_conj(rhs(j2+3,k));
+ blockB[count+2] = ei_conj(rhs(j2+2,k));
+ blockB[count+3] = ei_conj(rhs(j2+3,k));
}
count += nr;
}
@@ -134,25 +134,25 @@ struct ei_symm_pack_rhs
{
// normal
for (Index w=0 ; w<h; ++w)
- blockB[count+w] = alpha*rhs(k,j2+w);
+ blockB[count+w] = rhs(k,j2+w);
- blockB[count+h] = alpha*rhs(k,k);
+ blockB[count+h] = rhs(k,k);
// transpose
for (Index w=h+1 ; w<nr; ++w)
- blockB[count+w] = alpha*ei_conj(rhs(j2+w,k));
+ blockB[count+w] = ei_conj(rhs(j2+w,k));
count += nr;
++h;
}
// normal
for(Index k=j2+nr; k<end_k; k++)
{
- blockB[count+0] = alpha*rhs(k,j2+0);
- blockB[count+1] = alpha*rhs(k,j2+1);
+ blockB[count+0] = rhs(k,j2+0);
+ blockB[count+1] = rhs(k,j2+1);
if (nr==4)
{
- blockB[count+2] = alpha*rhs(k,j2+2);
- blockB[count+3] = alpha*rhs(k,j2+3);
+ blockB[count+2] = rhs(k,j2+2);
+ blockB[count+3] = rhs(k,j2+3);
}
count += nr;
}
@@ -163,12 +163,12 @@ struct ei_symm_pack_rhs
{
for(Index k=k2; k<end_k; k++)
{
- blockB[count+0] = alpha*ei_conj(rhs(j2+0,k));
- blockB[count+1] = alpha*ei_conj(rhs(j2+1,k));
+ blockB[count+0] = ei_conj(rhs(j2+0,k));
+ blockB[count+1] = ei_conj(rhs(j2+1,k));
if (nr==4)
{
- blockB[count+2] = alpha*ei_conj(rhs(j2+2,k));
- blockB[count+3] = alpha*ei_conj(rhs(j2+3,k));
+ blockB[count+2] = ei_conj(rhs(j2+2,k));
+ blockB[count+3] = ei_conj(rhs(j2+3,k));
}
count += nr;
}
@@ -181,13 +181,13 @@ struct ei_symm_pack_rhs
Index half = std::min(end_k,j2);
for(Index k=k2; k<half; k++)
{
- blockB[count] = alpha*ei_conj(rhs(j2,k));
+ blockB[count] = ei_conj(rhs(j2,k));
count += 1;
}
if(half==j2 && half<k2+rows)
{
- blockB[count] = alpha*ei_real(rhs(j2,j2));
+ blockB[count] = ei_real(rhs(j2,j2));
count += 1;
}
else
@@ -196,7 +196,7 @@ struct ei_symm_pack_rhs
// normal
for(Index k=half+1; k<k2+rows; k++)
{
- blockB[count] = alpha*rhs(k,j2);
+ blockB[count] = rhs(k,j2);
count += 1;
}
}
@@ -253,9 +253,6 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
- if (ConjugateRhs)
- alpha = ei_conj(alpha);
-
typedef ei_product_blocking_traits<Scalar,Scalar> Blocking;
Index kc = size; // cache block size along the K direction
@@ -282,7 +279,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
// we have selected one row panel of rhs and one column panel of lhs
// pack rhs's panel into a sequential chunk of memory
// and expand each coeff to a constant packet for further reuse
- pack_rhs(blockB, &rhs(k2,0), rhsStride, alpha, actual_kc, cols);
+ pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
// the select lhs's panel has to be split in three different parts:
// 1 - the transposed panel above the diagonal block => transposed packed copy
@@ -294,7 +291,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
// transposed packed copy
pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
- gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
+ gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
// the block diagonal
{
@@ -302,7 +299,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
// symmetric packed copy
pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
- gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
+ gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
for(Index i2=k2+kc; i2<size; i2+=mc)
@@ -311,7 +308,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,LhsStorageOrder,false>()
(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
- gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
+ gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}
@@ -338,9 +335,6 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
- if (ConjugateRhs)
- alpha = ei_conj(alpha);
-
typedef ei_product_blocking_traits<Scalar,Scalar> Blocking;
Index kc = size; // cache block size along the K direction
@@ -361,7 +355,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
{
const Index actual_kc = std::min(k2+kc,size)-k2;
- pack_rhs(blockB, _rhs, rhsStride, alpha, actual_kc, cols, k2);
+ pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
// => GEPP
for(Index i2=0; i2<rows; i2+=mc)
@@ -369,7 +363,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
const Index actual_mc = std::min(i2+mc,rows)-i2;
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
- gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
+ gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}