From 691e607d8578a43ef238fee50b4d8cd2c0c0f15e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 23 Jan 2013 23:56:57 +0100 Subject: Specialize GEBP traits and kernel for mpreal to by-pass mpreal and remove the costly creation of many temporaries. --- unsupported/Eigen/MPRealSupport | 70 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) (limited to 'unsupported/Eigen/MPRealSupport') diff --git a/unsupported/Eigen/MPRealSupport b/unsupported/Eigen/MPRealSupport index dfef0fe4e..8e699210f 100644 --- a/unsupported/Eigen/MPRealSupport +++ b/unsupported/Eigen/MPRealSupport @@ -12,8 +12,8 @@ #ifndef EIGEN_MPREALSUPPORT_MODULE_H #define EIGEN_MPREALSUPPORT_MODULE_H -#include #include +#include namespace Eigen { @@ -131,6 +131,74 @@ int main() template<> inline int cast(const mpfr::mpreal& x) { return int(x.toLong()); } + // Specialize GEBP kernel and traits for mpreal (no need for peeling, nor complicated stuff) + // This also permits to directly call mpfr's routines and avoid many temporaries produced by mpreal + template<> + class gebp_traits + { + public: + typedef mpfr::mpreal ResScalar; + enum { + nr = 2, // must be 2 for proper packing... + mr = 1, + WorkSpaceFactor = nr, + LhsProgress = 1, + RhsProgress = 1 + }; + }; + + template + struct gebp_kernel + { + typedef mpfr::mpreal mpreal; + + EIGEN_DONT_INLINE + void operator()(mpreal* res, Index resStride, const mpreal* blockA, const mpreal* blockB, Index rows, Index depth, Index cols, mpreal alpha, + Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, mpreal* /*unpackedB*/ = 0) + { + mpreal acc1, acc2, tmp; + + if(strideA==-1) strideA = depth; + if(strideB==-1) strideB = depth; + + for(Index j=0; j)(nr,cols-j); + mpreal *C1 = res + j*resStride; + mpreal *C2 = res + (j+1)*resStride; + for(Index i=0; i(blockB) + j*strideB + offsetB*actual_nr; + mpreal *A = const_cast(blockA) + i*strideA + offsetA; + acc1 = 0; + acc2 = 0; + for(Index k=0; k