aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2010-03-04 18:47:52 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2010-03-04 18:47:52 +0100
commitcefd9b888868bca6b23d67c0e6c69c49582508c3 (patch)
treee7cd032e6475afcee813cbefdf01d3c5bda33d9a /Eigen/src/Core/products
parent65eba35f98941a1d5c7ff6f854ed17224ef65b40 (diff)
parent8ed1ef446998dc35f738ad9984cf479dbfc2cc6c (diff)
merge with default branch
Diffstat (limited to 'Eigen/src/Core/products')
-rw-r--r--Eigen/src/Core/products/CoeffBasedProduct.h20
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h4
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix.h26
3 files changed, 28 insertions, 22 deletions
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index 3343b1875..e8016e915 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -305,10 +305,7 @@ struct ei_product_coeff_vectorized_dyn_selector
{
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
- res = ei_dot_impl<
- Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
- Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>,
- LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs.col(col));
+ res = lhs.row(row).cwiseProduct(rhs.col(col)).sum();
}
};
@@ -319,10 +316,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
{
EIGEN_STRONG_INLINE static void run(int /*row*/, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
- res = ei_dot_impl<
- Lhs,
- Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>,
- LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs.col(col));
+ res = lhs.cwiseProduct(rhs.col(col)).sum();
}
};
@@ -331,10 +325,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
{
EIGEN_STRONG_INLINE static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
- res = ei_dot_impl<
- Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
- Rhs,
- LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs);
+ res = lhs.row(row).cwiseProduct(rhs).sum();
}
};
@@ -343,10 +334,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
{
EIGEN_STRONG_INLINE static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
- res = ei_dot_impl<
- Lhs,
- Rhs,
- LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs);
+ res = lhs.cwiseProduct(rhs).sum();
}
};
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 6836a10de..8ac5afb05 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -31,6 +31,7 @@
#define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C);
#else
#define CJMADD(A,B,C,T) T = B; T = cj.pmul(A,T); C = ei_padd(C,T);
+// #define CJMADD(A,B,C,T) T = A; T = cj.pmul(T,B); C = ei_padd(C,T);
#endif
// optimized GEneral packed Block * packed Panel product kernel
@@ -146,7 +147,7 @@ struct ei_gebp_kernel
#endif
// performs "inner" product
- // TODO let's check wether the flowing peeled loop could not be
+ // TODO let's check wether the folowing peeled loop could not be
// optimized via optimal prefetching from one loop to the other
const Scalar* blB = unpackedB;
for(int k=0; k<peeled_kc; k+=4)
@@ -409,6 +410,7 @@ struct ei_gebp_kernel
CJMADD(A0,B2,C2,B2);
B2 = ei_pload(&blB[14*PacketSize]);
CJMADD(A0,B3,C3,B3);
+
A0 = ei_pload(&blA[3*PacketSize]);
B3 = ei_pload(&blB[15*PacketSize]);
CJMADD(A0,B0,C0,B0);
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index 785045db4..2e71b5fd4 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -43,7 +43,10 @@ struct ei_symm_pack_lhs
{
for(int w=0; w<h; w++)
blockA[count++] = ei_conj(lhs(k, i+w)); // transposed
- for(int w=h; w<BlockRows; w++)
+
+ blockA[count++] = ei_real(lhs(k,k)); // real (diagonal)
+
+ for(int w=h+1; w<BlockRows; w++)
blockA[count++] = lhs(i+w, k); // normal
++h;
}
@@ -71,8 +74,11 @@ struct ei_symm_pack_lhs
// do the same with mr==1
for(int i=peeled_mc; i<rows; i++)
{
- for(int k=0; k<=i; k++)
+ for(int k=0; k<i; k++)
blockA[count++] = lhs(i, k); // normal
+
+ blockA[count++] = ei_real(lhs(i, i)); // real (diagonal)
+
for(int k=i+1; k<cols; k++)
blockA[count++] = ei_conj(lhs(k, i)); // transposed
}
@@ -129,8 +135,11 @@ struct ei_symm_pack_rhs
// normal
for (int w=0 ; w<h; ++w)
blockB[count+w] = alpha*rhs(k,j2+w);
+
+ blockB[count+h] = alpha*rhs(k,k);
+
// transpose
- for (int w=h ; w<nr; ++w)
+ for (int w=h+1 ; w<nr; ++w)
blockB[count+w] = alpha*ei_conj(rhs(j2+w,k));
count += nr;
++h;
@@ -175,8 +184,15 @@ struct ei_symm_pack_rhs
blockB[count] = alpha*ei_conj(rhs(j2,k));
count += 1;
}
+
+ if(half==j2)
+ {
+ blockB[count] = alpha*ei_real(rhs(j2,j2));
+ count += 1;
+ }
+
// normal
- for(int k=half; k<k2+rows; k++)
+ for(int k=half+1; k<k2+rows; k++)
{
blockB[count] = alpha*rhs(k,j2);
count += 1;
@@ -389,7 +405,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
* RhsBlasTraits::extractScalarFactor(m_rhs);
ei_product_selfadjoint_matrix<Scalar,
- EIGEN_LOGICAL_XOR(LhsIsUpper,
+ EIGEN_LOGICAL_XOR(LhsIsUpper,
ei_traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
EIGEN_LOGICAL_XOR(RhsIsUpper,