diff options
author | 2010-07-06 19:10:24 +0200 | |
---|---|---|
committer | 2010-07-06 19:10:24 +0200 | |
commit | d6454788d960180e49aa84047ebb3aa75013032d (patch) | |
tree | 8ec6c6fcd49573739e53a8563d59bc20d8cdf03e /Eigen/src/Core/products/SelfadjointProduct.h | |
parent | 291fef576095a84cbca1a9559089b296ca284aea (diff) |
add support for vectorized conjugated products
Diffstat (limited to 'Eigen/src/Core/products/SelfadjointProduct.h')
-rw-r--r-- | Eigen/src/Core/products/SelfadjointProduct.h | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index eaf634de3..40c0c9aac 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -32,7 +32,7 @@ **********************************************************************/ // forward declarations (defined at the end of this file) -template<typename Scalar, typename Index, int mr, int nr, typename Conj, int UpLo> +template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> struct ei_sybb_kernel; /* Optimized selfadjoint product (_SYRK) */ @@ -84,12 +84,15 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr; // note that the actual rhs is the transpose/adjoint of mat - typedef ei_conj_helper<NumTraits<Scalar>::IsComplex && !AAT, NumTraits<Scalar>::IsComplex && AAT> Conj; + enum { + ConjLhs = NumTraits<Scalar>::IsComplex && !AAT, + ConjRhs = NumTraits<Scalar>::IsComplex && AAT + }; - ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, Conj> gebp_kernel; + ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ConjLhs, ConjRhs> gebp_kernel; ei_gemm_pack_rhs<Scalar, Index, Blocking::nr,MatStorageOrder==RowMajor ? ColMajor : RowMajor> pack_rhs; ei_gemm_pack_lhs<Scalar, Index, Blocking::mr,MatStorageOrder, false> pack_lhs; - ei_sybb_kernel<Scalar, Index, Blocking::mr, Blocking::nr, Conj, UpLo> sybb; + ei_sybb_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ConjLhs, ConjRhs, UpLo> sybb; for(Index k2=0; k2<depth; k2+=kc) { @@ -163,7 +166,7 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> // while the selfadjoint block overlapping the diagonal is evaluated into a // small temporary buffer which is then accumulated into the result using a // triangular traversal. -template<typename Scalar, typename Index, int mr, int nr, typename Conj, int UpLo> +template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> struct ei_sybb_kernel { enum { @@ -172,7 +175,7 @@ struct ei_sybb_kernel }; void operator()(Scalar* res, Index resStride, const Scalar* blockA, const Scalar* blockB, Index size, Index depth, Scalar* workspace) { - ei_gebp_kernel<Scalar, Index, mr, nr, Conj> gebp_kernel; + ei_gebp_kernel<Scalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel; Matrix<Scalar,BlockSize,BlockSize,ColMajor> buffer; // let's process the block per panel of actual_mc x BlockSize, |