From a156f5a8696a22a2738ab54bcc77a7565d5e9019 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 31 Jul 2009 13:18:19 +0200 Subject: faster trsm kernel and fix a couple of issues --- Eigen/src/Core/SolveTriangular.h | 4 +- Eigen/src/Core/products/TriangularSolverMatrix.h | 54 +++++++++--------------- 2 files changed, 22 insertions(+), 36 deletions(-) (limited to 'Eigen') diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index f60ef1c03..9b67dd580 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -49,7 +49,7 @@ struct ei_triangular_solver_selector0; @@ -224,7 +224,7 @@ void TriangularView::solveInPlace(const MatrixBase& ei_assert(!(Mode & ZeroDiagBit)); ei_assert(Mode & (UpperTriangularBit|LowerTriangularBit)); - enum { copy = ei_traits::Flags & RowMajorBit }; + enum { copy = ei_traits::Flags & RowMajorBit && RhsDerived::IsVectorAtCompileTime }; typedef typename ei_meta_if::type, RhsDerived&>::ret RhsCopy; RhsCopy rhsCopy(rhs); diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index 7842f8703..e49fac956 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -25,7 +25,7 @@ #ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H #define EIGEN_TRIANGULAR_SOLVER_MATRIX_H -// if the rhs is row major, we have to evaluate it in a temporary colmajor matrix +// if the rhs is row major, let's transpose the product template struct ei_triangular_solve_matrix { @@ -34,22 +34,16 @@ struct ei_triangular_solve_matrix + (Mode&UnitDiagBit) | ((Mode&UpperTriangular) ? LowerTriangular : UpperTriangular), + NumTraits::IsComplex && Conjugate, + TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor> ::run(size, cols, tri, triStride, _other, otherStride); - -// Map > other(_other, otherStride, cols); -// Matrix aux = other.block(0,0,size,cols); -// ei_triangular_solve_matrix -// ::run(size, cols, tri, triStride, aux.data(), aux.stride()); -// other.block(0,0,size,cols) = aux; } }; -/* Optimized triangular solver with multiple right hand side (_TRSM) +/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left */ template struct ei_triangular_solve_matrix @@ -190,11 +184,8 @@ struct ei_triangular_solve_matrix rhs(_tri,triStride); -// ei_blas_data_mapper lhs(_other,otherStride); - - Map > rhs(_tri,size,size); - Map > lhs(_other,rows,size); + ei_const_blas_data_mapper rhs(_tri,triStride); + ei_blas_data_mapper lhs(_other,otherStride); typedef ei_product_blocking_traits Blocking; enum { @@ -203,8 +194,8 @@ struct ei_triangular_solve_matrix(/*Blocking::Max_kc/4*/32,size); // cache block size along the K direction - int mc = std::min(/*Blocking::Max_mc*/32,size); // cache block size along the M direction + int kc = std::min(Blocking::Max_kc/4,size); // cache block size along the K direction + int mc = std::min(Blocking::Max_mc,size); // cache block size along the M direction Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); Scalar* blockB = ei_aligned_stack_new(Scalar, kc*size*Blocking::PacketSize); @@ -214,7 +205,6 @@ struct ei_triangular_solve_matrix pack_rhs; ei_gemm_pack_rhs pack_rhs_panel; ei_gemm_pack_lhs pack_lhs_panel; - ei_gemm_pack_lhs pack_lhs; for(int k2=IsLowerTriangular ? size : 0; IsLowerTriangular ? k2>0 : k20) pack_rhs(geb, &rhs(actual_k2,startPanel), triStride, -1, actual_kc, rs); @@ -239,8 +229,6 @@ struct ei_triangular_solve_matrix0) pack_rhs_panel(blockB+j2*actual_kc*Blocking::PacketSize, &rhs(actual_k2+panelOffset, actual_j2), triStride, -1, @@ -269,7 +257,6 @@ struct ei_triangular_solve_matrix0) if(panelLength>0) { gebp_kernel(&lhs(i2,absolute_j2), otherStride, @@ -284,18 +271,17 @@ struct ei_triangular_solve_matrix0) gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb, actual_mc, actual_kc, rs); -- cgit v1.2.3