aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2012-10-30 15:09:48 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2012-10-30 15:09:48 +0100
commitfea4220f3728e60bde06147ff9b89e10df203466 (patch)
treec26978864d04c3da65f09fe2590feb6b4dce52f0 /Eigen/src/SparseLU/SparseLU_kernel_bmod.h
parentf7e203fb0c3033df26b3f75cc3812989c19041a2 (diff)
SparseLU: add a specialized gemm kernel, and add padding to the supernodes such that supernodes columns are all properly aligned
Diffstat (limited to 'Eigen/src/SparseLU/SparseLU_kernel_bmod.h')
-rw-r--r--Eigen/src/SparseLU/SparseLU_kernel_bmod.h30
1 files changed, 16 insertions, 14 deletions
diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
index b15ff9c50..ca53fb6d0 100644
--- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
@@ -18,7 +18,7 @@
* \param [in,out]dense Packed values of the original matrix
* \param tempv temporary vector to use for updates
* \param lusup array containing the supernodes
- * \param nsupr Number of rows in the supernode
+ * \param lda Leading dimension in the supernode
* \param nrow Number of rows in the rectangular part of the supernode
* \param lsub compressed row subscripts of supernodes
* \param lptr pointer to the first column of the current supernode in lsub
@@ -28,7 +28,7 @@
template <int SegSizeAtCompileTime> struct LU_kernel_bmod
{
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
- EIGEN_DONT_INLINE static void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros)
+ EIGEN_DONT_INLINE static void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int lda, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros)
{
typedef typename ScalarVector::Scalar Scalar;
// First, copy U[*,j] segment from dense(*) to tempv(*)
@@ -43,23 +43,24 @@ template <int SegSizeAtCompileTime> struct LU_kernel_bmod
++isub;
}
// Dense triangular solve -- start effective triangle
- luptr += nsupr * no_zeros + no_zeros;
+ luptr += lda * no_zeros + no_zeros;
// Form Eigen matrix and vector
- Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) );
+ Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
u = A.template triangularView<UnitLower>().solve(u);
// Dense matrix-vector product y <-- B*x
luptr += segsize;
- Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) );
- Map<Matrix<Scalar,Dynamic,1> > l(tempv.data()+segsize, nrow);
- if(SegSizeAtCompileTime==2)
- l = u(0) * B.col(0) + u(1) * B.col(1);
- else if(SegSizeAtCompileTime==3)
- l = u(0) * B.col(0) + u(1) * B.col(1) + u(2) * B.col(2);
- else
- l.noalias() = B * u;
+ const int PacketSize = internal::packet_traits<Scalar>::size;
+ int ldl = internal::first_multiple(nrow, PacketSize);
+ Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
+ int aligned_offset = internal::first_aligned(tempv.data()+segsize, PacketSize);
+ int aligned_with_B_offset = (PacketSize-internal::first_aligned(B.data(), PacketSize))%PacketSize;
+ Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
+
+ l.setZero();
+ internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());
// Scatter tempv[] into SPA dense[] as a temporary storage
isub = lptr + no_zeros;
@@ -81,11 +82,12 @@ template <int SegSizeAtCompileTime> struct LU_kernel_bmod
template <> struct LU_kernel_bmod<1>
{
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
- EIGEN_DONT_INLINE static void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros)
+ EIGEN_DONT_INLINE static void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, int& luptr, const int lda, const int nrow,
+ IndexVector& lsub, const int lptr, const int no_zeros)
{
typedef typename ScalarVector::Scalar Scalar;
Scalar f = dense(lsub(lptr + no_zeros));
- luptr += nsupr * no_zeros + no_zeros + 1;
+ luptr += lda * no_zeros + no_zeros + 1;
const Scalar* a(lusup.data() + luptr);
const typename IndexVector::Scalar* irow(lsub.data()+lptr + no_zeros + 1);
int i = 0;