diff options
author | Gael Guennebaud <g.gael@free.fr> | 2010-02-23 13:06:49 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2010-02-23 13:06:49 +0100 |
commit | eb905500b6c654860aa9f9d9c77c7c2614e0ad10 (patch) | |
tree | 73d13d1389ffb7594777e26a52823f6c45a48eec /Eigen/src/Core/products/TriangularSolverMatrix.h | |
parent | d579d4cc37693823d03fbfedd2e48c40dcaf8938 (diff) |
significant speedup in the matrix-matrix products
Diffstat (limited to 'Eigen/src/Core/products/TriangularSolverMatrix.h')
-rw-r--r-- | Eigen/src/Core/products/TriangularSolverMatrix.h | 22 |
1 files changed, 13 insertions, 9 deletions
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index 23a645d7c..e32a9929c 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -67,7 +67,9 @@ struct ei_triangular_solve_matrix<Scalar,OnTheLeft,Mode,Conjugate,TriStorageOrde int mc = std::min<int>(Blocking::Max_mc,size); // cache block size along the M direction Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); - Scalar* blockB = ei_aligned_stack_new(Scalar, kc*cols*Blocking::PacketSize); + std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; + Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); + Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr; ei_conj_if<Conjugate> conj; ei_gebp_kernel<Scalar, Blocking::mr, Blocking::nr, ei_conj_helper<Conjugate,false> > gebp_kernel; @@ -146,7 +148,7 @@ struct ei_triangular_solve_matrix<Scalar,OnTheLeft,Mode,Conjugate,TriStorageOrde pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget); gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, - actualPanelWidth, actual_kc, 0, blockBOffset*Blocking::PacketSize); + actualPanelWidth, actual_kc, 0, blockBOffset); } } } @@ -169,7 +171,7 @@ struct ei_triangular_solve_matrix<Scalar,OnTheLeft,Mode,Conjugate,TriStorageOrde } ei_aligned_stack_delete(Scalar, blockA, kc*mc); - ei_aligned_stack_delete(Scalar, blockB, kc*cols*Blocking::PacketSize); + ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB); } }; @@ -198,7 +200,9 @@ struct ei_triangular_solve_matrix<Scalar,OnTheRight,Mode,Conjugate,TriStorageOrd int mc = std::min<int>(Blocking::Max_mc,size); // cache block size along the M direction Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); - Scalar* blockB = ei_aligned_stack_new(Scalar, kc*size*Blocking::PacketSize); + std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size; + Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); + Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr; ei_conj_if<Conjugate> conj; ei_gebp_kernel<Scalar, Blocking::mr, Blocking::nr, ei_conj_helper<false,Conjugate> > gebp_kernel; @@ -215,7 +219,7 @@ struct ei_triangular_solve_matrix<Scalar,OnTheRight,Mode,Conjugate,TriStorageOrd int startPanel = IsLower ? 0 : k2+actual_kc; int rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc; - Scalar* geb = blockB+actual_kc*actual_kc*Blocking::PacketSize; + Scalar* geb = blockB+actual_kc*actual_kc; if (rs>0) pack_rhs(geb, &rhs(actual_k2,startPanel), triStride, -1, actual_kc, rs); @@ -230,7 +234,7 @@ struct ei_triangular_solve_matrix<Scalar,OnTheRight,Mode,Conjugate,TriStorageOrd int panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2; if (panelLength>0) - pack_rhs_panel(blockB+j2*actual_kc*Blocking::PacketSize, + pack_rhs_panel(blockB+j2*actual_kc, &rhs(actual_k2+panelOffset, actual_j2), triStride, -1, panelLength, actualPanelWidth, actual_kc, panelOffset); @@ -260,10 +264,10 @@ struct ei_triangular_solve_matrix<Scalar,OnTheRight,Mode,Conjugate,TriStorageOrd if(panelLength>0) { gebp_kernel(&lhs(i2,absolute_j2), otherStride, - blockA, blockB+j2*actual_kc*Blocking::PacketSize, + blockA, blockB+j2*actual_kc, actual_mc, panelLength, actualPanelWidth, actual_kc, actual_kc, // strides - panelOffset, panelOffset*Blocking::PacketSize); // offsets + panelOffset, panelOffset); // offsets } // unblocked triangular solve @@ -298,7 +302,7 @@ struct ei_triangular_solve_matrix<Scalar,OnTheRight,Mode,Conjugate,TriStorageOrd } ei_aligned_stack_delete(Scalar, blockA, kc*mc); - ei_aligned_stack_delete(Scalar, blockB, kc*size*Blocking::PacketSize); + ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB); } }; |