aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products/TriangularSolverMatrix.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2010-02-23 13:06:49 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2010-02-23 13:06:49 +0100
commiteb905500b6c654860aa9f9d9c77c7c2614e0ad10 (patch)
tree73d13d1389ffb7594777e26a52823f6c45a48eec /Eigen/src/Core/products/TriangularSolverMatrix.h
parentd579d4cc37693823d03fbfedd2e48c40dcaf8938 (diff)
significant speedup in the matrix-matrix products
Diffstat (limited to 'Eigen/src/Core/products/TriangularSolverMatrix.h')
-rw-r--r--Eigen/src/Core/products/TriangularSolverMatrix.h22
1 files changed, 13 insertions, 9 deletions
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h
index 23a645d7c..e32a9929c 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -67,7 +67,9 @@ struct ei_triangular_solve_matrix<Scalar,OnTheLeft,Mode,Conjugate,TriStorageOrde
int mc = std::min<int>(Blocking::Max_mc,size); // cache block size along the M direction
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
- Scalar* blockB = ei_aligned_stack_new(Scalar, kc*cols*Blocking::PacketSize);
+ std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
+ Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
+ Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
ei_conj_if<Conjugate> conj;
ei_gebp_kernel<Scalar, Blocking::mr, Blocking::nr, ei_conj_helper<Conjugate,false> > gebp_kernel;
@@ -146,7 +148,7 @@ struct ei_triangular_solve_matrix<Scalar,OnTheLeft,Mode,Conjugate,TriStorageOrde
pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget);
gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols,
- actualPanelWidth, actual_kc, 0, blockBOffset*Blocking::PacketSize);
+ actualPanelWidth, actual_kc, 0, blockBOffset);
}
}
}
@@ -169,7 +171,7 @@ struct ei_triangular_solve_matrix<Scalar,OnTheLeft,Mode,Conjugate,TriStorageOrde
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
- ei_aligned_stack_delete(Scalar, blockB, kc*cols*Blocking::PacketSize);
+ ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
}
};
@@ -198,7 +200,9 @@ struct ei_triangular_solve_matrix<Scalar,OnTheRight,Mode,Conjugate,TriStorageOrd
int mc = std::min<int>(Blocking::Max_mc,size); // cache block size along the M direction
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
- Scalar* blockB = ei_aligned_stack_new(Scalar, kc*size*Blocking::PacketSize);
+ std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;
+ Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
+ Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
ei_conj_if<Conjugate> conj;
ei_gebp_kernel<Scalar, Blocking::mr, Blocking::nr, ei_conj_helper<false,Conjugate> > gebp_kernel;
@@ -215,7 +219,7 @@ struct ei_triangular_solve_matrix<Scalar,OnTheRight,Mode,Conjugate,TriStorageOrd
int startPanel = IsLower ? 0 : k2+actual_kc;
int rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;
- Scalar* geb = blockB+actual_kc*actual_kc*Blocking::PacketSize;
+ Scalar* geb = blockB+actual_kc*actual_kc;
if (rs>0) pack_rhs(geb, &rhs(actual_k2,startPanel), triStride, -1, actual_kc, rs);
@@ -230,7 +234,7 @@ struct ei_triangular_solve_matrix<Scalar,OnTheRight,Mode,Conjugate,TriStorageOrd
int panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
if (panelLength>0)
- pack_rhs_panel(blockB+j2*actual_kc*Blocking::PacketSize,
+ pack_rhs_panel(blockB+j2*actual_kc,
&rhs(actual_k2+panelOffset, actual_j2), triStride, -1,
panelLength, actualPanelWidth,
actual_kc, panelOffset);
@@ -260,10 +264,10 @@ struct ei_triangular_solve_matrix<Scalar,OnTheRight,Mode,Conjugate,TriStorageOrd
if(panelLength>0)
{
gebp_kernel(&lhs(i2,absolute_j2), otherStride,
- blockA, blockB+j2*actual_kc*Blocking::PacketSize,
+ blockA, blockB+j2*actual_kc,
actual_mc, panelLength, actualPanelWidth,
actual_kc, actual_kc, // strides
- panelOffset, panelOffset*Blocking::PacketSize); // offsets
+ panelOffset, panelOffset); // offsets
}
// unblocked triangular solve
@@ -298,7 +302,7 @@ struct ei_triangular_solve_matrix<Scalar,OnTheRight,Mode,Conjugate,TriStorageOrd
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
- ei_aligned_stack_delete(Scalar, blockB, kc*size*Blocking::PacketSize);
+ ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
}
};