aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2012-07-29 22:26:00 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2012-07-29 22:26:00 +0200
commit8f6d5eacb4af8fc31301625652a5017e6c2e50eb (patch)
treeb3d0fd50370dcba4631ed51cb33bc6123c6e3d54 /Eigen/src/SparseLU/SparseLU_kernel_bmod.h
parentce30d50e3ed9723ed3ecd38e7c99661730c12813 (diff)
optimize LU_kernel_bmod for small cases, and add an important .noalias()
Diffstat (limited to 'Eigen/src/SparseLU/SparseLU_kernel_bmod.h')
-rw-r--r--Eigen/src/SparseLU/SparseLU_kernel_bmod.h123
1 files changed, 77 insertions, 46 deletions
diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
index d5df70fd2..0d4b20f59 100644
--- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
@@ -39,54 +39,85 @@
* \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
* \return 0 on success
*/
-template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
-int LU_kernel_bmod(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros)
+template <int SegSizeAtCompileTime> struct LU_kernel_bmod
{
- typedef typename ScalarVector::Scalar Scalar;
- // First, copy U[*,j] segment from dense(*) to tempv(*)
- // The result of triangular solve is in tempv[*];
- // The result of matric-vector update is in dense[*]
- int isub = lptr + no_zeros;
- int i, irow;
- for (i = 0; i < segsize; i++)
+ template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
+ EIGEN_DONT_INLINE static void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros)
{
- irow = lsub(isub);
- tempv(i) = dense(irow);
- ++isub;
+ typedef typename ScalarVector::Scalar Scalar;
+ // First, copy U[*,j] segment from dense(*) to tempv(*)
+ // The result of triangular solve is in tempv[*];
+ // The result of matric-vector update is in dense[*]
+ int isub = lptr + no_zeros;
+ int i, irow;
+ for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
+ {
+ irow = lsub(isub);
+ tempv(i) = dense(irow);
+ ++isub;
+ }
+ // Dense triangular solve -- start effective triangle
+ luptr += nsupr * no_zeros + no_zeros;
+ // Form Eigen matrix and vector
+ Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) );
+ Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
+
+ u = A.template triangularView<UnitLower>().solve(u);
+
+ // Dense matrix-vector product y <-- B*x
+ luptr += segsize;
+ Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) );
+ Map<Matrix<Scalar,Dynamic,1> > l(tempv.data()+segsize, nrow);
+ if(SegSizeAtCompileTime==2)
+ l = u(0) * B.col(0) + u(1) * B.col(1);
+ else if(SegSizeAtCompileTime==3)
+ l = u(0) * B.col(0) + u(1) * B.col(1) + u(2) * B.col(2);
+ else
+ l.noalias() = B * u;
+
+ // Scatter tempv[] into SPA dense[] as a temporary storage
+ isub = lptr + no_zeros;
+ for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
+ {
+ irow = lsub(isub++);
+ dense(irow) = tempv(i);
+ }
+
+ // Scatter l into SPA dense[]
+ for (i = 0; i < nrow; i++)
+ {
+ irow = lsub(isub++);
+ dense(irow) -= l(i);
+ }
}
- // Dense triangular solve -- start effective triangle
- luptr += nsupr * no_zeros + no_zeros;
- // Form Eigen matrix and vector
- Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) );
- VectorBlock<ScalarVector> u(tempv, 0, segsize);
-
- u = A.template triangularView<UnitLower>().solve(u);
-
- // Dense matrix-vector product y <-- A*x
- luptr += segsize;
- new (&A) Map<Matrix<Scalar,Dynamic, Dynamic>, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) );
- VectorBlock<ScalarVector> l(tempv, segsize, nrow);
- l= A * u;
-
- // Scatter tempv[] into SPA dense[] as a temporary storage
- isub = lptr + no_zeros;
- for (i = 0; i < segsize; i++)
- {
- irow = lsub(isub);
- dense(irow) = tempv(i);
- tempv(i) = Scalar(0.0);
- ++isub;
- }
-
- // Scatter l into SPA dense[]
- for (i = 0; i < nrow; i++)
+};
+
+template <> struct LU_kernel_bmod<1>
+{
+ template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
+ EIGEN_DONT_INLINE static void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros)
{
- irow = lsub(isub);
- dense(irow) -= l(i);
- l(i) = Scalar(0.0);
- ++isub;
+ typedef typename ScalarVector::Scalar Scalar;
+ Scalar f = dense(lsub(lptr + no_zeros));
+ luptr += nsupr * no_zeros + no_zeros + 1;
+ const Scalar* a(lusup.data() + luptr);
+ const typename IndexVector::Scalar* irow(lsub.data()+lptr + no_zeros + 1);
+ int i = 0;
+ for (; i+1 < nrow; i+=2)
+ {
+ int i0 = *(irow++);
+ int i1 = *(irow++);
+ Scalar a0 = *(a++);
+ Scalar a1 = *(a++);
+ Scalar d0 = dense.coeff(i0);
+ Scalar d1 = dense.coeff(i1);
+ d0 -= f*a0;
+ d1 -= f*a1;
+ dense.coeffRef(i0) = d0;
+ dense.coeffRef(i1) = d1;
+ }
+ if(i<nrow)
+ dense.coeffRef(*(irow++)) -= f * *(a++);
}
-
- return 0;
-}
-#endif \ No newline at end of file
+};
+#endif