aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/products/TriangularMatrixMatrix.h68
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector_MKL.h8
-rw-r--r--blas/level3_impl.h12
-rw-r--r--test/nomalloc.cpp2
4 files changed, 54 insertions, 36 deletions
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 06053bfd9..c41d997d8 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -76,7 +76,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha)
+ Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
{
product_triangular_matrix_matrix<Scalar, Index,
(Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
@@ -86,7 +86,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
ConjugateLhs,
ColMajor>
- ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
+ ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
}
};
@@ -111,7 +111,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha)
+ Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
{
// strip zeros
Index diagSize = (std::min)(_rows,_depth);
@@ -122,15 +122,16 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
- Index kc = depth; // cache block size along the K direction
- Index mc = rows; // cache block size along the M direction
- Index nc = cols; // cache block size along the N direction
- computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
+ Index kc = blocking.kc(); // cache block size along the K direction
+ Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
+
+ std::size_t sizeA = kc*mc;
+ std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
- std::size_t sizeB = sizeW + kc*cols;
- ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
- ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
- Scalar* blockB = allocatedBlockB + sizeW;
+
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
triangularBuffer.setZero();
@@ -188,7 +189,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
- actualPanelWidth, actual_kc, 0, blockBOffset);
+ actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
// GEBP with remaining micro panel
if (lengthTarget>0)
@@ -198,7 +199,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
- actualPanelWidth, actual_kc, 0, blockBOffset);
+ actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
}
}
}
@@ -212,7 +213,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
- gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
+ gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
}
}
}
@@ -239,7 +240,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha)
+ Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
{
// strip zeros
Index diagSize = (std::min)(_cols,_depth);
@@ -250,16 +251,16 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
- Index kc = depth; // cache block size along the K direction
- Index mc = rows; // cache block size along the M direction
- Index nc = cols; // cache block size along the N direction
- computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
+ Index kc = blocking.kc(); // cache block size along the K direction
+ Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
+ std::size_t sizeA = kc*mc;
+ std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
- std::size_t sizeB = sizeW + kc*cols;
- ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
- ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
- Scalar* blockB = allocatedBlockB + sizeW;
+
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
triangularBuffer.setZero();
@@ -347,13 +348,13 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
alpha,
actual_kc, actual_kc, // strides
blockOffset, blockOffset,// offsets
- allocatedBlockB); // workspace
+ blockW); // workspace
}
}
gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
blockA, geb, actual_mc, actual_kc, rs,
alpha,
- -1, -1, 0, 0, allocatedBlockB);
+ -1, -1, 0, 0, blockW);
}
}
}
@@ -386,17 +387,28 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
+ typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
+ Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
+
+ enum { IsLower = (Mode&Lower) == Lower };
+ Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols());
+ Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows());
+ Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))
+ : ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols()));
+
+ BlockingType blocking(stripedRows, stripedCols, stripedDepth);
+
internal::product_triangular_matrix_matrix<Scalar, Index,
Mode, LhsIsTriangular,
(internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
(internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
(internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
::run(
- lhs.rows(), rhs.cols(), lhs.cols(),// LhsIsTriangular ? rhs.cols() : lhs.rows(), // sizes
+ stripedRows, stripedCols, stripedDepth, // sizes
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
- &dst.coeffRef(0,0), dst.outerStride(), // result info
- actualAlpha // alpha
+ &dst.coeffRef(0,0), dst.outerStride(), // result info
+ actualAlpha, blocking
);
}
};
diff --git a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
index 3c2c3049a..3589b8c5e 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
@@ -82,11 +82,11 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
LowUp = IsLower ? Lower : Upper \
}; \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
- const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
{ \
if (ConjLhs || IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
- _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+ _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha, blocking); \
return; \
}\
Index size = (std::min)(_rows,_cols); \
@@ -167,11 +167,11 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
LowUp = IsLower ? Lower : Upper \
}; \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
- const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
{ \
if (IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
- _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+ _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha, blocking); \
return; \
}\
Index size = (std::min)(_rows,_cols); \
diff --git a/blas/level3_impl.h b/blas/level3_impl.h
index 4f4f39080..0a6dbc383 100644
--- a/blas/level3_impl.h
+++ b/blas/level3_impl.h
@@ -167,7 +167,7 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m,
int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb)
{
// std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n";
- typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar);
+ typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking<Scalar,Scalar>&);
static functype func[32];
static bool init = false;
if(!init)
@@ -236,9 +236,15 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m,
matrix(b,*m,*n,*ldb).setZero();
if(SIDE(*side)==LEFT)
- func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha);
+ {
+ internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m);
+ func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha, blocking);
+ }
else
- func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, *ldb, alpha);
+ {
+ internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n);
+ func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, *ldb, alpha, blocking);
+ }
return 1;
}
diff --git a/test/nomalloc.cpp b/test/nomalloc.cpp
index 1feeff4bc..f7c2ed229 100644
--- a/test/nomalloc.cpp
+++ b/test/nomalloc.cpp
@@ -165,7 +165,7 @@ void ctms_decompositions()
X = hQR.solve(B);
x = hQR.solve(b);
Eigen::ColPivHouseholderQR<Matrix> cpQR; cpQR.compute(A);
- // FIXME X = cpQR.solve(B);
+ X = cpQR.solve(B);
x = cpQR.solve(b);
Eigen::FullPivHouseholderQR<Matrix> fpQR; fpQR.compute(A);
// FIXME X = fpQR.solve(B);