diff options
author | Gael Guennebaud <g.gael@free.fr> | 2010-07-05 23:27:54 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2010-07-05 23:27:54 +0200 |
commit | c69a22619293b791b884b7aefa65b245a902ea04 (patch) | |
tree | 2935dac75ec5cd896f11d5a61e9f2373eb9a7e3e /Eigen/src | |
parent | e1eccfad3f3b4e9e2ac94ca9a43256144ef72888 (diff) |
* extend the Has* packet traits and makes all functor use it
* extend the packing routines to support conjugation
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/Functors.h | 40 | ||||
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/Complex.h | 18 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/PacketMath.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 48 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrix.h | 9 | ||||
-rw-r--r-- | Eigen/src/Core/util/BlasUtil.h | 13 |
7 files changed, 85 insertions, 58 deletions
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index 78d1e5628..cbe20d50e 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -46,7 +46,7 @@ template<typename Scalar> struct ei_functor_traits<ei_scalar_sum_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, - PacketAccess = ei_packet_traits<Scalar>::size>1 + PacketAccess = ei_packet_traits<Scalar>::HasAdd }; }; @@ -69,7 +69,7 @@ template<typename Scalar> struct ei_functor_traits<ei_scalar_product_op<Scalar> > { enum { Cost = NumTraits<Scalar>::MulCost, - PacketAccess = ei_packet_traits<Scalar>::size>1 + PacketAccess = ei_packet_traits<Scalar>::HasMul }; }; @@ -92,7 +92,7 @@ template<typename Scalar> struct ei_functor_traits<ei_scalar_min_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, - PacketAccess = ei_packet_traits<Scalar>::size>1 + PacketAccess = ei_packet_traits<Scalar>::HasMin }; }; @@ -115,7 +115,7 @@ template<typename Scalar> struct ei_functor_traits<ei_scalar_max_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, - PacketAccess = ei_packet_traits<Scalar>::size>1 + PacketAccess = ei_packet_traits<Scalar>::HasMax }; }; @@ -158,7 +158,7 @@ template<typename Scalar> struct ei_functor_traits<ei_scalar_difference_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, - PacketAccess = ei_packet_traits<Scalar>::size>1 + PacketAccess = ei_packet_traits<Scalar>::HasSub }; }; @@ -178,10 +178,7 @@ template<typename Scalar> struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > { enum { Cost = 2 * NumTraits<Scalar>::MulCost, - PacketAccess = ei_packet_traits<Scalar>::size>1 - #if (defined EIGEN_VECTORIZE) - && !NumTraits<Scalar>::IsInteger - #endif + PacketAccess = ei_packet_traits<Scalar>::HasDiv }; }; @@ -203,7 +200,7 @@ template<typename Scalar> struct ei_functor_traits<ei_scalar_opposite_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, - PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; + PacketAccess = ei_packet_traits<Scalar>::HasNegate }; }; /** \internal @@ -224,7 +221,7 @@ struct ei_functor_traits<ei_scalar_abs_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, - PacketAccess = int(ei_packet_traits<Scalar>::size)>1 + PacketAccess = ei_packet_traits<Scalar>::HasAbs }; }; @@ -243,7 +240,7 @@ template<typename Scalar> struct ei_scalar_abs2_op { }; template<typename Scalar> struct ei_functor_traits<ei_scalar_abs2_op<Scalar> > -{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; }; +{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasAbs2 }; }; /** \internal * \brief Template functor to compute the conjugate of a complex value @@ -254,14 +251,14 @@ template<typename Scalar> struct ei_scalar_conjugate_op { EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_conjugate_op) EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return ei_conj(a); } template<typename PacketScalar> - EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return a; } + EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return ei_pconj(a); } }; template<typename Scalar> struct ei_functor_traits<ei_scalar_conjugate_op<Scalar> > { enum { Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0, - PacketAccess = int(ei_packet_traits<Scalar>::size)>1 + PacketAccess = ei_packet_traits<Scalar>::HasConj }; }; @@ -398,7 +395,7 @@ struct ei_scalar_multiple_op { }; template<typename Scalar> struct ei_functor_traits<ei_scalar_multiple_op<Scalar> > -{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; }; +{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; }; template<typename Scalar1, typename Scalar2> struct ei_scalar_multiple2_op { @@ -425,7 +422,7 @@ struct ei_scalar_quotient1_impl { }; template<typename Scalar> struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> > -{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; }; +{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; }; template<typename Scalar> struct ei_scalar_quotient1_impl<Scalar,true> { @@ -472,6 +469,7 @@ struct ei_scalar_constant_op { }; template<typename Scalar> struct ei_functor_traits<ei_scalar_constant_op<Scalar> > +// FIXME replace this packet test by a safe one { enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; }; template<typename Scalar> struct ei_scalar_identity_op { @@ -543,7 +541,7 @@ struct ei_linspaced_op_impl<Scalar,true> // nested expressions). template <typename Scalar, bool RandomAccess = true> struct ei_linspaced_op; template <typename Scalar, bool RandomAccess> struct ei_functor_traits< ei_linspaced_op<Scalar,RandomAccess> > -{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; }; +{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; }; template <typename Scalar, bool RandomAccess> struct ei_linspaced_op { typedef typename ei_packet_traits<Scalar>::type PacketScalar; @@ -588,7 +586,7 @@ struct ei_scalar_add_op { }; template<typename Scalar> struct ei_functor_traits<ei_scalar_add_op<Scalar> > -{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; }; +{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = ei_packet_traits<Scalar>::HasAdd }; }; /** \internal * \brief Template functor to compute the square root of a scalar @@ -676,7 +674,7 @@ struct ei_scalar_inverse_op { }; template<typename Scalar> struct ei_functor_traits<ei_scalar_inverse_op<Scalar> > -{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; }; +{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasDiv }; }; /** \internal * \brief Template functor to compute the square of a scalar @@ -692,7 +690,7 @@ struct ei_scalar_square_op { }; template<typename Scalar> struct ei_functor_traits<ei_scalar_square_op<Scalar> > -{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; }; +{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; }; /** \internal * \brief Template functor to compute the cube of a scalar @@ -708,7 +706,7 @@ struct ei_scalar_cube_op { }; template<typename Scalar> struct ei_functor_traits<ei_scalar_cube_op<Scalar> > -{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; }; +{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; }; // default functor traits for STL functors: diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 643e12e34..6cd288c55 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -58,8 +58,11 @@ struct ei_default_packet_traits HasMul = 1, HasNegate = 1, HasAbs = 1, + HasAbs2 = 1, HasMin = 1, HasMax = 1, + HasConj = 1, + HasSetLinear = 1, HasDiv = 0, HasSqrt = 0, @@ -105,6 +108,10 @@ ei_psub(const Packet& a, template<typename Packet> inline Packet ei_pnegate(const Packet& a) { return -a; } +/** \internal \returns conj(a) (coeff-wise) */ +template<typename Packet> inline Packet +ei_pconj(const Packet& a) { return ei_conj(a); } + /** \internal \returns a * b (coeff-wise) */ template<typename Packet> inline Packet ei_pmul(const Packet& a, diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index ab8bf8b84..3f7a04b7d 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -37,6 +37,18 @@ typedef __m128d Packet1cd; template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits { typedef Packet2cf type; enum {size=2}; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasSetLinear = 0 + }; }; template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; }; @@ -56,7 +68,11 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a) { const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000)); return Packet2cf(_mm_xor_ps(a.v,mask)); - +} +template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a) +{ + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); + return Packet2cf(_mm_xor_ps(a.v,mask)); } template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 29375bdae..8a3cdf679 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -62,6 +62,7 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits { typedef Packet4f type; enum {size=4}; enum { + HasDiv = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, HasLog = 1, @@ -70,7 +71,12 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits }; }; template<> struct ei_packet_traits<double> : ei_default_packet_traits -{ typedef Packet2d type; enum {size=2}; }; +{ + typedef Packet2d type; enum {size=2}; + enum { + HasDiv = 1 + }; +}; template<> struct ei_packet_traits<int> : ei_default_packet_traits { typedef Packet4i type; enum {size=4}; }; diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 4d9a09708..4a2ebb713 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -773,8 +773,8 @@ struct ei_gemm_pack_lhs // 4 5 6 7 16 17 18 19 25 28 // 8 9 10 11 20 21 22 23 26 29 // . . . . . . . . . . -template<typename Scalar, typename Index, int nr, bool PanelMode> -struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode> +template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode> +struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode> { typedef typename ei_packet_traits<Scalar>::type Packet; enum { PacketSize = ei_packet_traits<Scalar>::size }; @@ -782,6 +782,7 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode> Index stride=0, Index offset=0) { ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; bool hasAlpha = alpha != Scalar(1); Index packet_cols = (cols/nr) * nr; Index count = 0; @@ -796,19 +797,19 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode> if (hasAlpha) for(Index k=0; k<depth; k++) { - blockB[count+0] = alpha*b0[k]; - blockB[count+1] = alpha*b1[k]; - if(nr==4) blockB[count+2] = alpha*b2[k]; - if(nr==4) blockB[count+3] = alpha*b3[k]; + blockB[count+0] = alpha*cj(b0[k]); + blockB[count+1] = alpha*cj(b1[k]); + if(nr==4) blockB[count+2] = alpha*cj(b2[k]); + if(nr==4) blockB[count+3] = alpha*cj(b3[k]); count += nr; } else for(Index k=0; k<depth; k++) { - blockB[count+0] = b0[k]; - blockB[count+1] = b1[k]; - if(nr==4) blockB[count+2] = b2[k]; - if(nr==4) blockB[count+3] = b3[k]; + blockB[count+0] = cj(b0[k]); + blockB[count+1] = cj(b1[k]); + if(nr==4) blockB[count+2] = cj(b2[k]); + if(nr==4) blockB[count+3] = cj(b3[k]); count += nr; } // skip what we have after @@ -823,13 +824,13 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode> if (hasAlpha) for(Index k=0; k<depth; k++) { - blockB[count] = alpha*b0[k]; + blockB[count] = alpha*cj(b0[k]); count += 1; } else for(Index k=0; k<depth; k++) { - blockB[count] = b0[k]; + blockB[count] = cj(b0[k]); count += 1; } if(PanelMode) count += (stride-offset-depth); @@ -838,14 +839,15 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode> }; // this version is optimized for row major matrices -template<typename Scalar, typename Index, int nr, bool PanelMode> -struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode> +template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode> +struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode> { enum { PacketSize = ei_packet_traits<Scalar>::size }; void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Scalar alpha, Index depth, Index cols, Index stride=0, Index offset=0) { ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; bool hasAlpha = alpha != Scalar(1); Index packet_cols = (cols/nr) * nr; Index count = 0; @@ -858,10 +860,10 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode> for(Index k=0; k<depth; k++) { const Scalar* b0 = &rhs[k*rhsStride + j2]; - blockB[count+0] = alpha*b0[0]; - blockB[count+1] = alpha*b0[1]; - if(nr==4) blockB[count+2] = alpha*b0[2]; - if(nr==4) blockB[count+3] = alpha*b0[3]; + blockB[count+0] = alpha*cj(b0[0]); + blockB[count+1] = alpha*cj(b0[1]); + if(nr==4) blockB[count+2] = alpha*cj(b0[2]); + if(nr==4) blockB[count+3] = alpha*cj(b0[3]); count += nr; } } @@ -870,10 +872,10 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode> for(Index k=0; k<depth; k++) { const Scalar* b0 = &rhs[k*rhsStride + j2]; - blockB[count+0] = b0[0]; - blockB[count+1] = b0[1]; - if(nr==4) blockB[count+2] = b0[2]; - if(nr==4) blockB[count+3] = b0[3]; + blockB[count+0] = cj(b0[0]); + blockB[count+1] = cj(b0[1]); + if(nr==4) blockB[count+2] = cj(b0[2]); + if(nr==4) blockB[count+3] = cj(b0[3]); count += nr; } } @@ -887,7 +889,7 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode> const Scalar* b0 = &rhs[j2]; for(Index k=0; k<depth; k++) { - blockB[count] = alpha*b0[k*rhsStride]; + blockB[count] = alpha*cj(b0[k*rhsStride]); count += 1; } if(PanelMode) count += stride-offset-depth; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 39b283a3f..5d63aee3d 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -73,9 +73,6 @@ static void run(Index rows, Index cols, Index depth, ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); - if (ConjugateRhs) - alpha = ei_conj(alpha); - typedef typename ei_packet_traits<Scalar>::type PacketType; typedef ei_product_blocking_traits<Scalar> Blocking; @@ -83,9 +80,9 @@ static void run(Index rows, Index cols, Index depth, Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction //Index nc = blocking.nc(); // cache block size along the N direction - ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder> pack_rhs; - ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder> pack_lhs; - ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp; + ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder, ConjugateLhs> pack_lhs; + ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder, ConjugateRhs> pack_rhs; + ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr> gebp; #ifdef EIGEN_HAS_OPENMP if(info) diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 139ea73d2..cd0ed0ede 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -29,10 +29,15 @@ // implement and control fast level 2 and level 3 BLAS-like routines. // forward declarations -template<typename Scalar, typename Index, int mr, int nr, typename Conj> + +// Provides scalar/packet-wise product and product with accumulation +// with optional conjugation of the arguments. +template<bool ConjLhs, bool ConjRhs> struct ei_conj_helper; + +template<typename Scalar, typename Index, int mr, int nr, typename Conj = ei_conj_helper<false,false> > struct ei_gebp_kernel; -template<typename Scalar, typename Index, int nr, int StorageOrder, bool PanelMode=false> +template<typename Scalar, typename Index, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false> struct ei_gemm_pack_rhs; template<typename Scalar, typename Index, int mr, int StorageOrder, bool Conjugate = false, bool PanelMode = false> @@ -53,10 +58,6 @@ template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, static void ei_cache_friendly_product_rowmajor_times_vector( const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsSize, ResType& res, Scalar alpha); -// Provides scalar/packet-wise product and product with accumulation -// with optional conjugation of the arguments. -template<bool ConjLhs, bool ConjRhs> struct ei_conj_helper; - template<> struct ei_conj_helper<false,false> { template<typename T> |