diff options
author | karturov <karturov@KARTUROV-MOBL.ccr.corp.intel.com> | 2011-12-05 14:52:21 +0700 |
---|---|---|
committer | karturov <karturov@KARTUROV-MOBL.ccr.corp.intel.com> | 2011-12-05 14:52:21 +0700 |
commit | 015c331252a3b99c187b5607572f1cec531a4d1e (patch) | |
tree | e30a3f64a950edd21ae89f667cb2f859b6479c02 /Eigen/src/Core | |
parent | e270a5656aaafb055702a51a63541e05eabd8936 (diff) |
Intel(R) MKL support added.
* * *
License disclaimer changed to BSD license for MKL_support.h
* * *
Pardiso support fixed, test added.
blas/lapack tests fixed: Scalar parameter was added in Cholesky, product_matrix_vector_triangular remaned to triangular_matrix_vector_product.
* * *
PARDISO test was added physically.
Diffstat (limited to 'Eigen/src/Core')
18 files changed, 1845 insertions, 33 deletions
diff --git a/Eigen/src/Core/Assign_MKL.h b/Eigen/src/Core/Assign_MKL.h new file mode 100644 index 000000000..141015f1b --- /dev/null +++ b/Eigen/src/Core/Assign_MKL.h @@ -0,0 +1,219 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin() + ******************************************************************************** +*/ + +#ifndef EIGEN_ASSIGN_VML_H +#define EIGEN_ASSIGN_VML_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +template<typename Op> struct vml_call +{ enum { IsSupported = 0 }; }; + +template<typename Dst, typename Src, typename UnaryOp> +class vml_assign_traits +{ + private: + enum { + DstHasDirectAccess = Dst::Flags & DirectAccessBit, + SrcHasDirectAccess = Src::Flags & DirectAccessBit, + + StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime, + + MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess + && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, + MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), + VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, + LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD, + MayEnableVml = MightEnableVml && LargeEnough, + MayLinearize = MayEnableVml && MightLinearize + }; + public: + enum { + Traversal = MayLinearize ? LinearVectorizedTraversal + : MayEnableVml ? InnerVectorizedTraversal + : DefaultTraversal + }; +}; + +template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling, + int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal > +struct vml_assign_impl + : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn> +{ +}; + +template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling> +struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal> +{ + typedef typename Derived1::Scalar Scalar; + typedef typename Derived1::Index Index; + inline static void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src) + { + // in case we want to (or have to) skip VML at runtime we can call: + // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src); + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) { + const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : + &(src.nestedExpression().coeffRef(0, outer)); + Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); + vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr ); + } + } +}; + +template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling> +struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal> +{ + inline static void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src) + { + // in case we want to (or have to) skip VML at runtime we can call: + // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src); + vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() ); + } +}; + +// Macroses + +#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \ + template<typename Derived1, typename Derived2, typename UnaryOp> \ + struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \ + inline static void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \ + vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \ + } \ + }; + +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling) + + +#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) +#define EIGEN_MKL_VML_MODE VML_HA +#else +#define EIGEN_MKL_VML_MODE VML_LA +#endif + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ + template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \ + enum { IsSupported = 1 }; \ + static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \ + int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ + VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \ + } \ + }; + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ + template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \ + enum { IsSupported = 1 }; \ + static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \ + int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ + MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ + VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \ + } \ + }; + +#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ + template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \ + enum { IsSupported = 1 }; \ + static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \ + int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ + EIGENTYPE exponent = func.m_exponent; \ + MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ + VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \ + (VMLTYPE*)dst, &vmlMode); \ + } \ + }; + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) + + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) + + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan) +//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt) + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr) + +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16) + +} // end namespace internal + +#endif // EIGEN_ASSIGN_VML_H diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 5043b64fe..0ed65f9b4 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -42,14 +42,14 @@ struct tribb_kernel; template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, - int ResStorageOrder, int UpLo> + int ResStorageOrder, int UpLo, int Version = Specialized> struct general_matrix_matrix_triangular_product; // as usual if the result is row major => we transpose the product template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, - typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo> -struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo> -{ + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> +struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version> +{ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha) @@ -63,8 +63,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, }; template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, - typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo> -struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo> + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> +struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version> { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h new file mode 100644 index 000000000..d86bc0542 --- /dev/null +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h @@ -0,0 +1,144 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Level 3 BLAS SYRK/HERK implementation. + ******************************************************************************** +*/ + +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H +#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo> +struct general_matrix_matrix_rankupdate : + general_matrix_matrix_triangular_product< + Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {}; + + +// try to go to BLAS specialization +#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \ +template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs, int UpLo> \ +struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \ + Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \ + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ + const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \ + { \ + if (lhs==rhs) { \ + general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \ + ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ + } else { \ + general_matrix_matrix_triangular_product<Index, \ + Scalar, LhsStorageOrder, ConjugateLhs, \ + Scalar, RhsStorageOrder, ConjugateRhs, \ + ColMajor, UpLo, BuiltIn> \ + ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ + } \ + } \ +}; + +EIGEN_MKL_RANKUPDATE_SPECIALIZE(double) +//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex) +EIGEN_MKL_RANKUPDATE_SPECIALIZE(float) +//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex) + +// SYRK for float/double +#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \ +template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \ +struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \ + enum { \ + IsLower = (UpLo&Lower) == Lower, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \ + }; \ + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ + const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ + { \ + /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \ +\ + MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ + char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \ + MKLTYPE alpha_, beta_; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \ + MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \ + } \ +}; + +// HERK for complex data +#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \ +template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \ +struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \ + enum { \ + IsLower = (UpLo&Lower) == Lower, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \ + }; \ + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ + const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ + { \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \ +\ + MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ + char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \ + RTYPE alpha_, beta_; \ + const EIGTYPE* a_ptr; \ +\ +/* Set alpha_ & beta_ */ \ +/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\ +/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \ + alpha_ = alpha.real(); \ + beta_ = 1.0; \ +/* Copy with conjugation in some cases*/ \ + MatrixType a; \ + if (conjA) { \ + Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \ + a = mapA.conjugate(); \ + lda = a.outerStride(); \ + a_ptr = a.data(); \ + } else a_ptr=lhs; \ + MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \ + } \ +}; + + +EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk) +EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk) + +//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk) +//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk) + + +} // end namespace internal + +#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h new file mode 100644 index 000000000..9ad589e66 --- /dev/null +++ b/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h @@ -0,0 +1,116 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * General matrix-matrix product functionality based on ?GEMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H +#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +/********************************************************************** +* This file implements general matrix-matrix multiplication using BLAS +* gemm function via partial specialization of +* general_matrix_matrix_product::run(..) method for float, double, +* std::complex<float> and std::complex<double> types +**********************************************************************/ + +// gemm specialization + +#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \ +template< \ + typename Index, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \ +{ \ +static void run(Index rows, Index cols, Index depth, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha, \ + level3_blocking<EIGTYPE, EIGTYPE>& blocking, \ + GemmParallelInfo<Index>* info = 0) \ +{ \ + using std::conj; \ +\ + char transa, transb; \ + MKL_INT m, n, k, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX a_tmp, b_tmp; \ + EIGTYPE myone(1);\ +\ +/* Set transpose options */ \ + transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ + transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ +\ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ + k = (MKL_INT)depth; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)lhsStride; \ + ldb = (MKL_INT)rhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \ + a_tmp = lhs.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else a = _lhs; \ +\ + if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \ + b_tmp = rhs.conjugate(); \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } else b = _rhs; \ +\ + MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +}}; + +GEMM_SPECIALIZATION(double, d, double, d) +GEMM_SPECIALIZATION(float, f, float, s) +GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z) +GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c) + +} //end of namespase + +#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index e0e2cbf8f..1481c138d 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -40,8 +40,8 @@ namespace internal { * |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp * |cplx |real |real | optimal case, vectorization possible via real-cplx mul */ -template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs> -struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs> +template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version> +struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version> { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; @@ -296,8 +296,8 @@ EIGEN_DONT_INLINE static void run( * - alpha is always a complex (or converted to a complex) * - no vectorization */ -template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs> -struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs> +template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version> +struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version> { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h new file mode 100644 index 000000000..20194c9fa --- /dev/null +++ b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h @@ -0,0 +1,129 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * General matrix-vector product functionality based on ?GEMV. + ******************************************************************************** +*/ + +#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H +#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +/********************************************************************** +* This file implements general matrix-vector multiplication using BLAS +* gemv function via partial specialization of +* general_matrix_vector_product::run(..) method for float, double, +* std::complex<float> and std::complex<double> types +**********************************************************************/ + +// gemv specialization + +template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs> +struct general_matrix_vector_product_gemv : + general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {}; + +#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \ +template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \ +struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \ +static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const Scalar* lhs, Index lhsStride, \ + const Scalar* rhs, Index rhsIncr, \ + Scalar* res, Index resIncr, Scalar alpha) \ +{ \ + if (ConjugateLhs) { \ + general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \ + rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ + } else { \ + general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \ + rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ + } \ +} \ +}; \ +template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \ +struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \ +static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const Scalar* lhs, Index lhsStride, \ + const Scalar* rhs, Index rhsIncr, \ + Scalar* res, Index resIncr, Scalar alpha) \ +{ \ + general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \ + rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ +} \ +}; \ + +EIGEN_MKL_GEMV_SPECIALIZE(double) +EIGEN_MKL_GEMV_SPECIALIZE(float) +EIGEN_MKL_GEMV_SPECIALIZE(dcomplex) +EIGEN_MKL_GEMV_SPECIALIZE(scomplex) + +#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \ +template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \ +struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \ +{ \ +typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\ +\ +static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* lhs, Index lhsStride, \ + const EIGTYPE* rhs, Index rhsIncr, \ + EIGTYPE* res, Index resIncr, EIGTYPE alpha) \ +{ \ + MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \ + MKLTYPE alpha_, beta_; \ + const EIGTYPE *x_ptr, myone(1); \ + char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \ + if (LhsStorageOrder==RowMajor) { \ + m=cols; \ + n=rows; \ + }\ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ + GEMVVector x_tmp; \ + if (ConjugateRhs) { \ + Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \ + x_tmp=map_x.conjugate(); \ + x_ptr=x_tmp.data(); \ + incx=1; \ + } else x_ptr=rhs; \ + MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ +}\ +}; + +EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d) +EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s) +EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z) +EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c) + +} //end of namespase + +#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index ecdedc363..d002f6d2e 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -85,7 +85,7 @@ template<typename Index> struct GemmParallelInfo template<bool Condition, typename Functor, typename Index> void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose) { -#ifndef EIGEN_HAS_OPENMP +#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_MKL) // FIXME the transpose variable is only needed to properly split // the matrix product when multithreading is enabled. This is a temporary // fix to support row-major destination matrices. This whole diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h new file mode 100644 index 000000000..338a57a55 --- /dev/null +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h @@ -0,0 +1,293 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H +#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + + +/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ + +#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template <typename Index, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \ +{\ +\ + static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='L', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + EIGTYPE myone(1);\ +\ +/* Set transpose options */ \ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)lhsStride; \ + ldb = (MKL_INT)rhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (LhsStorageOrder==RowMajor) uplo='U'; \ + a = _lhs; \ +\ + if (RhsStorageOrder==RowMajor) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = rhs.adjoint(); \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } else b = _rhs; \ +\ + MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +\ + } \ +}; + + +#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template <typename Index, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \ +{\ + static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='L', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \ + EIGTYPE myone(1); \ +\ +/* Set transpose options */ \ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)lhsStride; \ + ldb = (MKL_INT)rhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \ + Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \ + a_tmp = lhs.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else a = _lhs; \ + if (LhsStorageOrder==RowMajor) uplo='U'; \ +\ + if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \ + b = _rhs; } \ + else { \ + if (RhsStorageOrder==ColMajor && ConjugateRhs) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \ + b_tmp = rhs.conjugate(); \ + } else \ + if (ConjugateRhs) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = rhs.adjoint(); \ + } else { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = rhs.transpose(); \ + } \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } \ +\ + MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +\ + } \ +}; + +EIGEN_MKL_SYMM_L(double, double, d, d) +EIGEN_MKL_SYMM_L(float, float, f, s) +EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c) + + +/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ + +#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template <typename Index, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \ +{\ +\ + static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='R', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + EIGTYPE myone(1);\ +\ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)rhsStride; \ + ldb = (MKL_INT)lhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (RhsStorageOrder==RowMajor) uplo='U'; \ + a = _rhs; \ +\ + if (LhsStorageOrder==RowMajor) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = lhs.adjoint(); \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } else b = _lhs; \ +\ + MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +\ + } \ +}; + + +#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template <typename Index, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \ +{\ + static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='R', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \ + EIGTYPE myone(1); \ +\ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)rhsStride; \ + ldb = (MKL_INT)lhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \ + Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \ + a_tmp = rhs.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else a = _rhs; \ + if (RhsStorageOrder==RowMajor) uplo='U'; \ +\ + if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \ + b = _lhs; } \ + else { \ + if (LhsStorageOrder==ColMajor && ConjugateLhs) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \ + b_tmp = lhs.conjugate(); \ + } else \ + if (ConjugateLhs) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \ + b_tmp = lhs.adjoint(); \ + } else { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \ + b_tmp = lhs.transpose(); \ + } \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } \ +\ + MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ + } \ +}; + +EIGEN_MKL_SYMM_R(double, double, d, d) +EIGEN_MKL_SYMM_R(float, float, f, s) +EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c) + +} // end namespace internal + +#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index d6121fc07..14419e168 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -32,8 +32,15 @@ namespace internal { * the number of load/stores of the result by a factor 2 and to reduce * the instruction dependency. */ -template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> -static EIGEN_DONT_INLINE void product_selfadjoint_vector( + +template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized> +struct selfadjoint_matrix_vector_product; + +template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version> +struct selfadjoint_matrix_vector_product + +{ +static EIGEN_DONT_INLINE void run( Index size, const Scalar* lhs, Index lhsStride, const Scalar* _rhs, Index rhsIncr, @@ -159,6 +166,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector( res[j] += alpha * t2; } } +}; } // end namespace internal @@ -232,7 +240,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> } - internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)> + internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run ( lhs.rows(), // size &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h new file mode 100644 index 000000000..37d20d581 --- /dev/null +++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h @@ -0,0 +1,112 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV. + ******************************************************************************** +*/ + +#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H +#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +/********************************************************************** +* This file implements selfadjoint matrix-vector multiplication using BLAS +**********************************************************************/ + +// symv/hemv specialization + +template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> +struct selfadjoint_matrix_vector_product_symv : + selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {}; + +#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \ +template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \ +struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \ +static EIGEN_DONT_INLINE void run( \ + Index size, const Scalar* lhs, Index lhsStride, \ + const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \ + enum {\ + IsColMajor = StorageOrder==ColMajor \ + }; \ + if (IsColMajor == ConjugateLhs) {\ + selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \ + size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + } else {\ + selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \ + size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + }\ + } \ +}; \ + +EIGEN_MKL_SYMV_SPECIALIZE(double) +EIGEN_MKL_SYMV_SPECIALIZE(float) +EIGEN_MKL_SYMV_SPECIALIZE(dcomplex) +EIGEN_MKL_SYMV_SPECIALIZE(scomplex) + +#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \ +template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \ +struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \ +{ \ +typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\ +\ +static EIGEN_DONT_INLINE void run( \ +Index size, const EIGTYPE* lhs, Index lhsStride, \ +const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ +{ \ + enum {\ + IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \ + IsLower = UpLo == Lower ? 1 : 0, \ + }; \ + MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \ + MKLTYPE alpha_, beta_; \ + const EIGTYPE *x_ptr, myone(1); \ + char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ + SYMVVector x_tmp; \ + if (ConjugateRhs) { \ + Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \ + x_tmp=map_x.conjugate(); \ + x_ptr=x_tmp.data(); \ + incx=1; \ + } else x_ptr=_rhs; \ + MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ +}\ +}; + +EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv) +EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv) +EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv) +EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv) + +} //end of namespase + +#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index 3a4523fa4..1def96719 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -110,7 +110,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false> Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived()); enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 }; - + internal::general_matrix_matrix_triangular_product<Index, Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex, diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index e31727eb2..7632ba85a 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -58,16 +58,16 @@ template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs, - int ResStorageOrder> + int ResStorageOrder, int Version = Specialized> struct product_triangular_matrix_matrix; template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs> + int RhsStorageOrder, bool ConjugateRhs, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,RowMajor> + RhsStorageOrder,ConjugateRhs,RowMajor,Version> { static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, @@ -91,10 +91,10 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, // implements col-major += alpha * op(triangular) * op(general) template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs> + int RhsStorageOrder, bool ConjugateRhs, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,ColMajor> + RhsStorageOrder,ConjugateRhs,ColMajor,Version> { typedef gebp_traits<Scalar,Scalar> Traits; @@ -220,10 +220,10 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, // implements col-major += alpha * op(general) * op(triangular) template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs> + int RhsStorageOrder, bool ConjugateRhs, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,false, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,ColMajor> + RhsStorageOrder,ConjugateRhs,ColMajor,Version> { typedef gebp_traits<Scalar,Scalar> Traits; enum { diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h new file mode 100644 index 000000000..5c0a07739 --- /dev/null +++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h @@ -0,0 +1,307 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Triangular matrix * matrix product functionality based on ?TRMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H +#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + + +template <typename Scalar, typename Index, + int Mode, bool LhsIsTriangular, + int LhsStorageOrder, bool ConjugateLhs, + int RhsStorageOrder, bool ConjugateRhs, + int ResStorageOrder> +struct product_triangular_matrix_matrix_trmm : + product_triangular_matrix_matrix<Scalar,Index,Mode, + LhsIsTriangular,LhsStorageOrder,ConjugateLhs, + RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {}; + + +// try to go to BLAS specialization +#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \ +template <typename Index, int Mode, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \ + LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \ + inline static void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\ + const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { \ + product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \ + LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \ + RhsStorageOrder, ConjugateRhs, ColMajor>::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \ + } \ +}; + +EIGEN_MKL_TRMM_SPECIALIZE(double, true) +EIGEN_MKL_TRMM_SPECIALIZE(double, false) +EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true) +EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false) +EIGEN_MKL_TRMM_SPECIALIZE(float, true) +EIGEN_MKL_TRMM_SPECIALIZE(float, false) +EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true) +EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false) + +// implements col-major += alpha * op(triangular) * op(general) +#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template <typename Index, int Mode, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ + LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \ + }; \ +\ + static EIGEN_DONT_INLINE void run( \ + Index _rows, Index _cols, Index _depth, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + Index diagSize = (std::min)(_rows,_depth); \ + Index rows = IsLower ? _rows : diagSize; \ + Index depth = IsLower ? diagSize : _depth; \ + Index cols = _cols; \ +\ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \ +\ +/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ + if (rows != depth) { \ +\ + int nthr = mkl_domain_get_max_threads(MKL_BLAS); \ +\ + if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \ + /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ + product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \ + LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \ + /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \ + } else { \ + /* Make sense to call GEMM */ \ + Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \ + MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \ + MKL_INT aStride = aa_tmp.outerStride(); \ + gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \ + general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \ + rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ +\ + /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ + } \ + return; \ + } \ + char side = 'L', transa, uplo, diag = 'N'; \ + EIGTYPE *b; \ + const EIGTYPE *a; \ + MKL_INT m, n, k, lda, ldb, ldc; \ + MKLTYPE alpha_; \ +\ +/* Set alpha_*/ \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ +\ +/* Set m, n */ \ + m = (MKL_INT)diagSize; \ + n = (MKL_INT)cols; \ +\ +/* Set trans */ \ + transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ +\ +/* Set b, ldb */ \ + Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \ + MatrixX##EIGPREFIX b_tmp; \ +\ + if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ +\ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ + MatrixLhs a_tmp; \ +\ + if ((conjA!=0) || (SetDiag==0)) { \ + if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \ + if (IsZeroDiag) \ + a_tmp.diagonal().setZero(); \ + else if (IsUnitDiag) \ + a_tmp.diagonal().setOnes();\ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _lhs; \ + lda = lhsStride; \ + } \ + /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \ +/* call ?trmm*/ \ + MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ +\ +/* Add op(a_triangular)*b into res*/ \ + Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ + res_tmp=res_tmp+b_tmp; \ + } \ +}; + +EIGEN_MKL_TRMM_L(double, double, d, d) +EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_TRMM_L(float, float, f, s) +EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c) + +// implements col-major += alpha * op(general) * op(triangular) +#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template <typename Index, int Mode, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ + LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \ + }; \ +\ + static EIGEN_DONT_INLINE void run( \ + Index _rows, Index _cols, Index _depth, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + Index diagSize = (std::min)(_cols,_depth); \ + Index rows = _rows; \ + Index depth = IsLower ? _depth : diagSize; \ + Index cols = IsLower ? diagSize : _cols; \ +\ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \ +\ +/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ + if (cols != depth) { \ +\ + int nthr = mkl_domain_get_max_threads(MKL_BLAS); \ +\ + if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \ + /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ + product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \ + LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \ + /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \ + } else { \ + /* Make sense to call GEMM */ \ + Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \ + MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \ + MKL_INT aStride = aa_tmp.outerStride(); \ + gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \ + general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \ + rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, blocking); \ +\ + /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ + } \ + return; \ + } \ + char side = 'R', transa, uplo, diag = 'N'; \ + EIGTYPE *b; \ + const EIGTYPE *a; \ + MKL_INT m, n, k, lda, ldb, ldc; \ + MKLTYPE alpha_; \ +\ +/* Set alpha_*/ \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ +\ +/* Set m, n */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)diagSize; \ +\ +/* Set trans */ \ + transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ +\ +/* Set b, ldb */ \ + Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ + MatrixX##EIGPREFIX b_tmp; \ +\ + if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ +\ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \ + MatrixRhs a_tmp; \ +\ + if ((conjA!=0) || (SetDiag==0)) { \ + if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \ + if (IsZeroDiag) \ + a_tmp.diagonal().setZero(); \ + else if (IsUnitDiag) \ + a_tmp.diagonal().setOnes();\ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _rhs; \ + lda = rhsStride; \ + } \ + /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \ +/* call ?trmm*/ \ + MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ +\ +/* Add op(a_triangular)*b into res*/ \ + Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ + res_tmp=res_tmp+b_tmp; \ + } \ +}; + +EIGEN_MKL_TRMM_R(double, double, d, d) +EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_TRMM_R(float, float, f, s) +EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c) + +} // end namespace internal + +#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index 7bfff504c..ebda994f6 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -27,11 +27,11 @@ namespace internal { -template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder> -struct product_triangular_matrix_vector; +template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder, int Version=Specialized> +struct triangular_matrix_vector_product; -template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs> -struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor> +template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version> +struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version> { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; enum { @@ -75,7 +75,7 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C if (r>0) { Index s = IsLower ? pi+actualPanelWidth : 0; - general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run( + general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run( r, actualPanelWidth, &lhs.coeffRef(s,pi), lhsStride, &rhs.coeffRef(pi), rhsIncr, @@ -93,8 +93,8 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C } }; -template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs> -struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor> +template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version> +struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version> { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; enum { @@ -138,7 +138,7 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C if (r>0) { Index s = IsLower ? 0 : pi + actualPanelWidth; - general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run( + general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run( actualPanelWidth, r, &lhs.coeffRef(pi,s), lhsStride, &rhs.coeffRef(s), rhsIncr, @@ -271,7 +271,7 @@ template<> struct trmv_selector<ColMajor> MappedDest(actualDestPtr, dest.size()) = dest; } - internal::product_triangular_matrix_vector + internal::triangular_matrix_vector_product <Index,Mode, LhsScalar, LhsBlasTraits::NeedToConjugate, RhsScalar, RhsBlasTraits::NeedToConjugate, @@ -331,7 +331,7 @@ template<> struct trmv_selector<RowMajor> Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; } - internal::product_triangular_matrix_vector + internal::triangular_matrix_vector_product <Index,Mode, LhsScalar, LhsBlasTraits::NeedToConjugate, RhsScalar, RhsBlasTraits::NeedToConjugate, diff --git a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h new file mode 100644 index 000000000..6acd43c88 --- /dev/null +++ b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h @@ -0,0 +1,247 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Triangular matrix-vector product functionality based on ?TRMV. + ******************************************************************************** +*/ + +#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H +#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +/********************************************************************** +* This file implements triangular matrix-vector multiplication using BLAS +**********************************************************************/ + +// trmv/hemv specialization + +template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder> +struct triangular_matrix_vector_product_trmv : + triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {}; + +#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \ +template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ +struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \ + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ + const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ + triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + } \ +}; \ +template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ +struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \ + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ + const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ + triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + } \ +}; + +EIGEN_MKL_TRMV_SPECIALIZE(double) +EIGEN_MKL_TRMV_SPECIALIZE(float) +EIGEN_MKL_TRMV_SPECIALIZE(dcomplex) +EIGEN_MKL_TRMV_SPECIALIZE(scomplex) + +// implements col-major: res += alpha * op(triangular) * vector +#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ +struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper \ + }; \ + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \ + { \ + if (ConjLhs || IsZeroDiag) { \ + triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + return; \ + }\ + Index size = (std::min)(_rows,_cols); \ + Index rows = IsLower ? _rows : size; \ + Index cols = IsLower ? size : _cols; \ +\ + typedef VectorX##EIGPREFIX VectorRhs; \ + EIGTYPE *x, *y;\ +\ +/* Set x*/ \ + Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \ + VectorRhs x_tmp; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ +\ +/* Square part handling */\ +\ + char trans, uplo, diag; \ + MKL_INT m, n, k, lda, incx, incy; \ + EIGTYPE const *a; \ + MKLTYPE alpha_, beta_; \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \ +\ +/* Set m, n */ \ + n = (MKL_INT)size; \ + lda = lhsStride; \ + incx = 1; \ + incy = resIncr; \ +\ +/* Set uplo, trans and diag*/ \ + trans = 'N'; \ + uplo = IsLower ? 'L' : 'U'; \ + diag = IsUnitDiag ? 'U' : 'N'; \ +\ +/* call ?TRMV*/ \ + std::cout << "TRMV: CM\n";\ + MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ +\ +/* Add op(a_tr)rhs into res*/ \ + MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ +/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ + if (size<(std::max)(rows,cols)) { \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ + if (size<rows) { \ + y = _res + size*resIncr; \ + a = _lhs + size; \ + m = rows-size; \ + n = size; \ + } \ + if (size<cols) { \ + x += size; \ + y = _res; \ + a = _lhs + size*lda; \ + m = size; \ + n = cols-size; \ + } \ + MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \ + } \ + } \ +}; + +EIGEN_MKL_TRMV_CM(double, double, d, d) +EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_TRMV_CM(float, float, f, s) +EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c) + +// implements row-major: res += alpha * op(triangular) * vector +#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ +struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper \ + }; \ + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \ + { \ + if (IsZeroDiag) { \ + triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + return; \ + }\ + Index size = (std::min)(_rows,_cols); \ + Index rows = IsLower ? _rows : size; \ + Index cols = IsLower ? size : _cols; \ +\ + typedef VectorX##EIGPREFIX VectorRhs; \ + EIGTYPE *x, *y;\ +\ +/* Set x*/ \ + Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \ + VectorRhs x_tmp; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ +\ +/* Square part handling */\ +\ + char trans, uplo, diag; \ + MKL_INT m, n, k, lda, incx, incy; \ + EIGTYPE const *a; \ + MKLTYPE alpha_, beta_; \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \ +\ +/* Set m, n */ \ + n = (MKL_INT)size; \ + lda = lhsStride; \ + incx = 1; \ + incy = resIncr; \ +\ +/* Set uplo, trans and diag*/ \ + trans = ConjLhs ? 'C' : 'T'; \ + uplo = IsLower ? 'U' : 'L'; \ + diag = IsUnitDiag ? 'U' : 'N'; \ +\ +/* call ?TRMV*/ \ + std::cout << "TRMV: RM\n";\ + MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ +\ +/* Add op(a_tr)rhs into res*/ \ + MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ +/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ + if (size<(std::max)(rows,cols)) { \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ + if (size<rows) { \ + y = _res + size*resIncr; \ + a = _lhs + size*lda; \ + m = rows-size; \ + n = size; \ + } \ + if (size<cols) { \ + x += size; \ + y = _res; \ + a = _lhs + size; \ + m = size; \ + n = cols-size; \ + } \ + MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \ + } \ + } \ +}; + +EIGEN_MKL_TRMV_RM(double, double, d, d) +EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_TRMV_RM(float, float, f, s) +EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c) + +} //end of namespase + +#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h new file mode 100644 index 000000000..644ef35c5 --- /dev/null +++ b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h @@ -0,0 +1,153 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Triangular matrix * matrix product functionality based on ?TRMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H +#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" + +namespace internal { + +// implements LeftSide op(triangular)^-1 * general +#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \ +template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ +struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \ + }; \ + static EIGEN_DONT_INLINE void run( \ + Index size, Index otherSize, \ + const EIGTYPE* _tri, Index triStride, \ + EIGTYPE* _other, Index otherStride) \ + { \ + MKL_INT m = size, n = otherSize, lda, ldb; \ + char side = 'L', uplo, diag='N', transa; \ + /* Set alpha_ */ \ + MKLTYPE alpha; \ + EIGTYPE myone(1); \ + assign_scalar_eig2mkl(alpha, myone); \ + ldb = otherStride;\ +\ + const EIGTYPE *a; \ +/* Set trans */ \ + transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \ + Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \ + MatrixTri a_tmp; \ +\ + if (conjA) { \ + a_tmp = tri.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _tri; \ + lda = triStride; \ + } \ + if (IsUnitDiag) diag='U'; \ +/* call ?trsm*/ \ + MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ + } \ +}; + +EIGEN_MKL_TRSM_L(double, double, d) +EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z) +EIGEN_MKL_TRSM_L(float, float, s) +EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c) + + +// implements RightSide general * op(triangular)^-1 +#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \ +template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ +struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \ + }; \ + static EIGEN_DONT_INLINE void run( \ + Index size, Index otherSize, \ + const EIGTYPE* _tri, Index triStride, \ + EIGTYPE* _other, Index otherStride) \ + { \ + MKL_INT m = otherSize, n = size, lda, ldb; \ + char side = 'R', uplo, diag='N', transa; \ + /* Set alpha_ */ \ + MKLTYPE alpha; \ + EIGTYPE myone(1); \ + assign_scalar_eig2mkl(alpha, myone); \ + ldb = otherStride;\ +\ + const EIGTYPE *a; \ +/* Set trans */ \ + transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \ + Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \ + MatrixTri a_tmp; \ +\ + if (conjA) { \ + a_tmp = tri.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _tri; \ + lda = triStride; \ + } \ + if (IsUnitDiag) diag='U'; \ +/* call ?trsm*/ \ + MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ + /*std::cout << "TRMS_L specialization!\n";*/ \ + } \ +}; + +EIGEN_MKL_TRSM_R(double, double, d) +EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z) +EIGEN_MKL_TRSM_R(float, float, s) +EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c) + + +} // end namespace internal + +#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index f1d93d2f8..860d72d91 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -47,7 +47,7 @@ template< int ResStorageOrder> struct general_matrix_matrix_product; -template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs> +template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version=Specialized> struct general_matrix_vector_product; diff --git a/Eigen/src/Core/util/MKL_support.h b/Eigen/src/Core/util/MKL_support.h new file mode 100644 index 000000000..6d78e97b1 --- /dev/null +++ b/Eigen/src/Core/util/MKL_support.h @@ -0,0 +1,84 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Include file with common MKL declarations + ******************************************************************************** +*/ + +#ifndef EIGEN_MKL_SUPPORT_H +#define EIGEN_MKL_SUPPORT_H + +#include <mkl.h> +#include <mkl_lapacke.h> +#include <iostream> + +#define EIGEN_MKL_VML_THRESHOLD 128 + +typedef std::complex<double> dcomplex; +typedef std::complex<float> scomplex; + +namespace internal { + +template<typename MKLType, typename EigenType> +static inline void assign_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) { + mklScalar=eigenScalar; +} + +template <> +inline void assign_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) { + mklScalar.real=eigenScalar.real(); + mklScalar.imag=eigenScalar.imag(); +} + +template <> +inline void assign_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) { + mklScalar.real=eigenScalar.real(); + mklScalar.imag=eigenScalar.imag(); +} + +template<typename MKLType, typename EigenType> +static inline void assign_conj_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) { + mklScalar=eigenScalar; +} + +template <> +inline void assign_conj_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) { + mklScalar.real=eigenScalar.real(); + mklScalar.imag=-eigenScalar.imag(); +} + +template <> +inline void assign_conj_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) { + mklScalar.real=eigenScalar.real(); + mklScalar.imag=-eigenScalar.imag(); +} + +} // end namespace internal + + +#endif
\ No newline at end of file |