diff options
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/Product.h | 13 | ||||
-rw-r--r-- | Eigen/src/Core/SolveTriangular.h | 9 | ||||
-rw-r--r-- | Eigen/src/Core/TriangularMatrix.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/Complex.h | 215 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/PacketMath.h | 1 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/Complex.h | 257 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/Complex.h | 15 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/PacketMath.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixVector.h | 45 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixVector.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/util/BlasUtil.h | 16 | ||||
-rw-r--r-- | Eigen/src/Core/util/Meta.h | 12 |
13 files changed, 553 insertions, 59 deletions
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index ca30c4dce..139132c6b 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -324,6 +324,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true> typedef typename ProductType::ActualRhsType ActualRhsType; typedef typename ProductType::LhsBlasTraits LhsBlasTraits; typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef Map<Matrix<Scalar,Dynamic,1>, Aligned> MappedDest; ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); @@ -342,7 +343,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true> else { actualDest = ei_aligned_stack_new(Scalar,dest.size()); - Map<typename Dest::PlainObject>(actualDest, dest.size()) = dest; + MappedDest(actualDest, dest.size()) = dest; } ei_cache_friendly_product_colmajor_times_vector @@ -353,7 +354,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true> if (!EvalToDest) { - dest = Map<typename Dest::PlainObject>(actualDest, dest.size()); + dest = MappedDest(actualDest, dest.size()); ei_aligned_stack_delete(Scalar, actualDest, dest.size()); } } @@ -365,6 +366,7 @@ template<> struct ei_gemv_selector<OnTheRight,RowMajor,true> static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) { typedef typename ProductType::Scalar Scalar; + typedef typename ProductType::Index Index; typedef typename ProductType::ActualLhsType ActualLhsType; typedef typename ProductType::ActualRhsType ActualRhsType; typedef typename ProductType::_ActualRhsType _ActualRhsType; @@ -394,9 +396,12 @@ template<> struct ei_gemv_selector<OnTheRight,RowMajor,true> } ei_cache_friendly_product_rowmajor_times_vector - <LhsBlasTraits::NeedToConjugate,RhsBlasTraits::NeedToConjugate>( + <LhsBlasTraits::NeedToConjugate,RhsBlasTraits::NeedToConjugate, Scalar, Index>( + actualLhs.rows(), actualLhs.cols(), &actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(), - rhs_data, prod.rhs().size(), dest, actualAlpha); + rhs_data, 1, + &dest.coeffRef(0,0), dest.innerStride(), + actualAlpha); if (!DirectlyUseRhs) ei_aligned_stack_delete(Scalar, rhs_data, prod.rhs().size()); } diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 310e262d8..90ce2a802 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -80,12 +80,13 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,RowMajor // 2 - it is slighlty faster at runtime Index startRow = IsLower ? pi : pi-actualPanelWidth; Index startCol = IsLower ? 0 : pi; - VectorBlock<Rhs,Dynamic> target(other,startRow,actualPanelWidth); - ei_cache_friendly_product_rowmajor_times_vector<LhsProductTraits::NeedToConjugate,false>( + ei_cache_friendly_product_rowmajor_times_vector<LhsProductTraits::NeedToConjugate,false,Scalar,Index>( + actualPanelWidth, r, &(actualLhs.const_cast_derived().coeffRef(startRow,startCol)), actualLhs.outerStride(), - &(other.coeffRef(startCol)), r, - target, Scalar(-1)); + &(other.coeffRef(startCol)), other.innerStride(), + &other.coeffRef(startRow), other.innerStride(), + Scalar(-1)); } for(Index k=0; k<actualPanelWidth; ++k) diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index c66bbb9fa..e2afa23b0 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -90,7 +90,7 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived> protected: - void check_coordinates(Index row, Index col) + void check_coordinates(Index row, Index col) const { EIGEN_ONLY_USED_FOR_DEBUG(row); EIGEN_ONLY_USED_FOR_DEBUG(col); @@ -102,12 +102,12 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived> } #ifdef EIGEN_INTERNAL_DEBUGGING - void check_coordinates_internal(Index row, Index col) + void check_coordinates_internal(Index row, Index col) const { check_coordinates(row, col); } #else - void check_coordinates_internal(Index , Index ) {} + void check_coordinates_internal(Index , Index ) const {} #endif }; diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h new file mode 100644 index 000000000..2dba95a2f --- /dev/null +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -0,0 +1,215 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_COMPLEX_ALTIVEC_H +#define EIGEN_COMPLEX_ALTIVEC_H + +static Packet4ui ei_p4ui_CONJ_XOR = vec_mergeh((Packet4ui)ei_p4i_ZERO, (Packet4ui)ei_p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; +static Packet16uc ei_p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; +static Packet16uc ei_p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)ei_p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; +static Packet16uc ei_p16uc_COMPLEX_REV = vec_sld(ei_p16uc_REVERSE, ei_p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; +static Packet16uc ei_p16uc_COMPLEX_REV2 = vec_sld(ei_p16uc_FORWARD, ei_p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; +static Packet16uc ei_p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; +static Packet16uc ei_p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)ei_p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; + +//---------- float ---------- +struct Packet2cf +{ + EIGEN_STRONG_INLINE Packet2cf() {} + EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} + Packet4f v; +}; + +template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits +{ + typedef Packet2cf type; + enum { + Vectorizable = 1, + size = 2, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasSetLinear = 0 + }; +}; + +template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; }; + +template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<std::complex<float> >(const std::complex<float>& from) +{ + Packet2cf res; + /* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */ + if ((ptrdiff_t)&from % 16 == 0) { + res.v = ei_pload((const float *)&from); + res.v = vec_perm(res.v, res.v, ei_p16uc_PSET_HI); + } else { + res.v = ei_ploadu((const float *)&from); + res.v = vec_perm(res.v, res.v, ei_p16uc_PSET_LO); + } + return res; +} + +template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a) { return Packet2cf(ei_psub<Packet4f>(ei_p4f_ZERO, a.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, ei_p4ui_CONJ_XOR)); } + +template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) +{ + Packet4f v1, v2; + + // Permute and multiply the real parts of a and b + v1 = vec_perm(a.v, a.v, ei_p16uc_COMPLEX_RE); + // Get the imaginary parts of a + v2 = vec_perm(a.v, a.v, ei_p16uc_COMPLEX_IM); + // multiply a_re * b + v1 = vec_madd(v1, b.v, ei_p4f_ZERO); + // multiply a_im * b and get the conjugate result + v2 = vec_madd(v2, b.v, ei_p4f_ZERO); + v2 = (Packet4f) vec_xor((Packet4ui)v2, ei_p4ui_CONJ_XOR); + // permute back to a proper order + v2 = vec_perm(v2, v2, ei_p16uc_COMPLEX_REV); + + return Packet2cf(vec_add(v1, v2)); +} + +template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); } + +template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload((const float*)from)); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu((const float*)from)); } + +template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore((float*)to, from.v); } +template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((float*)to, from.v); } + +template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); } + +template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a) +{ + std::complex<float> EIGEN_ALIGN16 res[2]; + ei_pstore((float *)&res, a.v); + + return res[0]; +} + +template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a) +{ + Packet4f rev_a; + rev_a = vec_perm(a.v, a.v, ei_p16uc_COMPLEX_REV2); + return Packet2cf(rev_a); +} + +template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a) +{ + Packet4f b; + b = (Packet4f) vec_sld(a.v, a.v, 8); + b = ei_padd(a.v, b); + return ei_pfirst(Packet2cf(sum)); +} + +template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs) +{ + Packet4f b1, b2; + + b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); + b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); + b2 = (Packet4f) vec_sld(b2, b2, 8); + b2 = ei_padd(b1, b2); + + return Packet2cf(b2); +} + +template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a) +{ + Packet4f b; + Packet2cf prod; + b = (Packet4f) vec_sld(a.v, a.v, 8); + prod = ei_pmul(a, Packet2cf(b)); + + return ei_pfirst(prod); +} + +template<int Offset> +struct ei_palign_impl<Offset,Packet2cf> +{ + EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second) + { + if (Offset==1) + { + first.v = vec_sld(first.v, second.v, 8); + } + } +}; + +template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true> +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return ei_padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + return ei_pmul(a, ei_pconj(b)); + } +}; + +template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false> +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return ei_padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + return ei_pmul(ei_pconj(a), b); + } +}; + +template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true> +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return ei_padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + return ei_pconj(ei_pmul(a, b)); + } +}; + +template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) +{ + // TODO optimize it for AltiVec + Packet2cf res = ei_conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b); + Packet4f s = vec_madd(b.v, b.v, ei_p4f_ZERO); + return Packet2cf(ei_pdiv(res.v, vec_add(s,vec_perm(s, s, ei_p16uc_COMPLEX_REV)))); +} + +#endif // EIGEN_COMPLEX_ALTIVEC_H diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index f58da60e6..a3ceed8e8 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -74,6 +74,7 @@ typedef __vector unsigned char Packet16uc; static Packet4f ei_p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 }; static Packet4i ei_p4i_COUNTDOWN = { 3, 2, 1, 0 }; static Packet16uc ei_p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; +static Packet16uc ei_p16uc_FORWARD = vec_lvsl(0, (float*)0); static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h new file mode 100644 index 000000000..bf68a2bbb --- /dev/null +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -0,0 +1,257 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_COMPLEX_ALTIVEC_H +#define EIGEN_COMPLEX_ALTIVEC_H + +static uint32x4_t ei_p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; +static uint32x2_t ei_p2ui_CONJ_XOR = { 0x00000000, 0x80000000 }; + +//---------- float ---------- +struct Packet2cf +{ + EIGEN_STRONG_INLINE Packet2cf() {} + EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} + Packet4f v; +}; + +template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits +{ + typedef Packet2cf type; + enum { + Vectorizable = 1, + size = 2, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasSetLinear = 0 + }; +}; + +template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; }; + +template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<std::complex<float> >(const std::complex<float>& from) +{ + float32x2_t r64; + r64 = vld1_f32((float *)&from); + + return Packet2cf(vcombine_f32(r64, r64)); +} + +template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(ei_padd<Packet4f>(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(ei_psub<Packet4f>(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a) { return Packet2cf(ei_pnegate<Packet4f>(a.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a) +{ + return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), ei_p4ui_CONJ_XOR))); +} + +template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) +{ + Packet4f v1, v2; + float32x2_t a_lo, a_hi; + + // Get the real values of a | a1_re | a1_re | a2_re | a2_re | + v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0)); + // Get the real values of a | a1_im | a1_im | a2_im | a2_im | + v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1)); + // Multiply the real a with b + v1 = vmulq_f32(v1, b.v); + // Multiply the imag a with b + v2 = vmulq_f32(v2, b.v); + // Conjugate v2 + v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), ei_p4ui_CONJ_XOR)); + // Swap real/imag elements in v2. + a_lo = vrev64_f32(vget_low_f32(v2)); + a_hi = vrev64_f32(vget_high_f32(v2)); + v2 = vcombine_f32(a_lo, a_hi); + // Add and return the result + return Packet2cf(vaddq_f32(v1, v2)); +} + +template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) +{ + return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); +} +template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) +{ + return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); +} +template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) +{ + return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); +} +template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) +{ + return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); +} + +template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload((const float*)from)); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu((const float*)from)); } + +template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore((float*)to, from.v); } +template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((float*)to, from.v); } + +template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); } + +template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a) +{ + std::complex<float> EIGEN_ALIGN16 x[2]; + vst1q_f32((float *)x, a.v); + return x[0]; +} + +template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a) +{ + float32x2_t a_lo, a_hi; + Packet4f a_r128; + + a_lo = vget_low_f32(a.v); + a_hi = vget_high_f32(a.v); + a_r128 = vcombine_f32(a_hi, a_lo); + + return Packet2cf(a_r128); +} + +template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a) +{ + float32x2_t a1, a2; + std::complex<float> s; + + a1 = vget_low_f32(a.v); + a2 = vget_high_f32(a.v); + a2 = vadd_f32(a1, a2); + vst1_f32((float *)&s, a2); + + return s; +} + +template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs) +{ + Packet4f sum1, sum2, sum; + + // Add the first two 64-bit float32x2_t of vecs[0] + sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v)); + sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v)); + sum = vaddq_f32(sum1, sum2); + + return Packet2cf(sum); +} + +template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a) +{ + float32x2_t a1, a2, v1, v2, prod; + std::complex<float> s; + + a1 = vget_low_f32(a.v); + a2 = vget_high_f32(a.v); + // Get the real values of a | a1_re | a1_re | a2_re | a2_re | + v1 = vdup_lane_f32(a1, 0); + // Get the real values of a | a1_im | a1_im | a2_im | a2_im | + v2 = vdup_lane_f32(a1, 1); + // Multiply the real a with b + v1 = vmul_f32(v1, a2); + // Multiply the imag a with b + v2 = vmul_f32(v2, a2); + // Conjugate v2 + v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), ei_p2ui_CONJ_XOR)); + // Swap real/imag elements in v2. + v2 = vrev64_f32(v2); + // Add v1, v2 + prod = vadd_f32(v1, v2); + + vst1_f32((float *)&s, prod); + + return s; +} + +template<int Offset> +struct ei_palign_impl<Offset,Packet2cf> +{ + EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second) + { + if (Offset==1) + { + first.v = vextq_f32(first.v, second.v, 2); + } + } +}; + +template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true> +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return ei_padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + return ei_pmul(a, ei_pconj(b)); + } +}; + +template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false> +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return ei_padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + return ei_pmul(ei_pconj(a), b); + } +}; + +template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true> +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return ei_padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + return ei_pconj(ei_pmul(a, b)); + } +}; + +template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) +{ + // TODO optimize it for AltiVec + Packet2cf res = ei_conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b); + Packet4f s, rev_s; + float32x2_t a_lo, a_hi; + + // this computes the norm + s = vmulq_f32(b.v, b.v); + a_lo = vrev64_f32(vget_low_f32(s)); + a_hi = vrev64_f32(vget_high_f32(s)); + rev_s = vcombine_f32(a_lo, a_hi); + + return Packet2cf(ei_pdiv(res.v, vaddq_f32(s,rev_s))); +} + +#endif // EIGEN_COMPLEX_ALTIVEC_H diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 97750adbe..5b0d6ab12 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -64,7 +64,8 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits Vectorizable = 1, AlignedOnScalar = 1, size = 4, - + + HasDiv = 1, // FIXME check the Has* HasSin = 0, HasCos = 0, @@ -176,8 +177,8 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index d61e82d00..9d32ede0e 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -275,21 +275,26 @@ template<> EIGEN_STRONG_INLINE Packet1cd ei_por <Packet1cd>(const Packet1cd& template<> EIGEN_STRONG_INLINE Packet1cd ei_pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd ei_pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet1cd ei_pload <std::complex<double> >(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(_mm_load_pd((const double*)from)); } +// FIXME force unaligned load, this is a temporary fix +template<> EIGEN_STRONG_INLINE Packet1cd ei_pload <std::complex<double> >(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(ei_ploadu((const double*)from)); } template<> EIGEN_STRONG_INLINE Packet1cd ei_ploadu<std::complex<double> >(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ei_ploadu((const double*)from)); } template<> EIGEN_STRONG_INLINE Packet1cd ei_pset1<std::complex<double> >(const std::complex<double>& from) { /* here we really have to use unaligned loads :( */ return ei_ploadu(&from); } -template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd((double*)to, from.v); } +// FIXME force unaligned store, this is a temporary fix +template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstoreu((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE std::complex<double> ei_pfirst<Packet1cd>(const Packet1cd& a) { - EIGEN_ALIGN16 double res[2]; - _mm_store_pd(res, a.v); - return *(std::complex<double>*)res; +// EIGEN_ALIGN16 double res[2]; +// _mm_store_pd(res, a.v); +// return *(std::complex<double>*)res; + EIGEN_ALIGN16 std::complex<double> res; + ei_pstore(&res, a); + return res; } template<> EIGEN_STRONG_INLINE Packet1cd ei_preverse(const Packet1cd& a) { return a; } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index bfbc74370..91af346ed 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -112,8 +112,12 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return ei_vec4f_swizzle1(res,0,0,0,0); } template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) { +#ifdef EIGEN_VECTORIZE_SSE3 + return _mm_loaddup_pd(&from); +#else Packet2d res = _mm_set_sd(from); return ei_vec2d_swizzle1(res, 0, 0); +#endif } #else template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return _mm_set1_ps(from); } @@ -267,13 +271,13 @@ template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { _m template<> EIGEN_STRONG_INLINE void ei_prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64) +#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64) && !defined(__INTEL_COMPILER) // The temporary variable fixes an internal compilation error. // Direct of the struct members fixed bug #62. template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; } template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; } template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; } -#elif defined(_MSC_VER) && (_MSC_VER <= 1500) +#elif defined(_MSC_VER) && (_MSC_VER <= 1500) && !defined(__INTEL_COMPILER) // The temporary variable fixes an internal compilation error. template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; } template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; } diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index e671a657e..0997746ef 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -258,13 +258,15 @@ void ei_cache_friendly_product_colmajor_times_vector( } // TODO add peeling to mask unaligned load/stores -template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename ResType> +template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index> static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( + Index rows, Index cols, const Scalar* lhs, Index lhsStride, - const Scalar* rhs, Index rhsSize, - ResType& res, + const Scalar* rhs, Index rhsIncr, + Scalar* res, Index resIncr, Scalar alpha) { + ei_internal_assert(rhsIncr==1); #ifdef _EIGEN_ACCUMULATE_PACKETS #error _EIGEN_ACCUMULATE_PACKETS has already been defined #endif @@ -291,22 +293,22 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( const Index peels = 2; const Index PacketAlignedMask = PacketSize-1; const Index PeelAlignedMask = PacketSize*peels-1; - const Index size = rhsSize; + const Index depth = cols; // How many coeffs of the result do we have to skip to be aligned. // Here we assume data are at least aligned on the base scalar type // if that's not the case then vectorization is discarded, see below. - Index alignedStart = ei_first_aligned(rhs, size); - Index alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0; + Index alignedStart = ei_first_aligned(rhs, depth); + Index alignedSize = PacketSize>1 ? alignedStart + ((depth-alignedStart) & ~PacketAlignedMask) : 0; const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart; const Index alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0; Index alignmentPattern = alignmentStep==0 ? AllAligned - : alignmentStep==(PacketSize/2) ? EvenAligned - : FirstAligned; + : alignmentStep==(PacketSize/2) ? EvenAligned + : FirstAligned; // we cannot assume the first element is aligned because of sub-matrices - const Index lhsAlignmentOffset = ei_first_aligned(lhs,size); + const Index lhsAlignmentOffset = ei_first_aligned(lhs,depth); // find how many rows do we have to skip to be aligned with rhs (if possible) Index skipRows = 0; @@ -318,7 +320,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( } else if (PacketSize>1) { - ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize); + ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || depth<PacketSize); while (skipRows<PacketSize && alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%PacketSize)) @@ -331,26 +333,26 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( } else { - skipRows = std::min(skipRows,Index(res.size())); + skipRows = std::min(skipRows,Index(rows)); // note that the skiped columns are processed later. } ei_internal_assert( alignmentPattern==NoneAligned || PacketSize==1 - || (skipRows + rowsAtOnce >= res.size()) - || PacketSize > rhsSize + || (skipRows + rowsAtOnce >= rows) + || PacketSize > depth || (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(Packet))==0); } else if(Vectorizable) { alignedStart = 0; - alignedSize = size; + alignedSize = depth; alignmentPattern = AllAligned; } Index offset1 = (FirstAligned && alignmentStep==1?3:1); Index offset3 = (FirstAligned && alignmentStep==1?1:3); - Index rowBound = ((res.size()-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; + Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; for (Index i=skipRows; i<rowBound; i+=rowsAtOnce) { EIGEN_ALIGN16 Scalar tmp0 = Scalar(0); @@ -439,17 +441,20 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( // process remaining coeffs (or all if no explicit vectorization) // FIXME this loop get vectorized by the compiler ! - for (Index j=alignedSize; j<size; ++j) + for (Index j=alignedSize; j<depth; ++j) { Scalar b = rhs[j]; tmp0 += cj.pmul(lhs0[j],b); tmp1 += cj.pmul(lhs1[j],b); tmp2 += cj.pmul(lhs2[j],b); tmp3 += cj.pmul(lhs3[j],b); } - res[i] += alpha*tmp0; res[i+offset1] += alpha*tmp1; res[i+2] += alpha*tmp2; res[i+offset3] += alpha*tmp3; + res[i*resIncr] += alpha*tmp0; + res[(i+offset1)*resIncr] += alpha*tmp1; + res[(i+2)*resIncr] += alpha*tmp2; + res[(i+offset3)*resIncr] += alpha*tmp3; } // process remaining first and last rows (at most columnsAtOnce-1) - Index end = res.size(); + Index end = rows; Index start = rowBound; do { @@ -477,9 +482,9 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( // process remaining scalars // FIXME this loop get vectorized by the compiler ! - for (Index j=alignedSize; j<size; ++j) + for (Index j=alignedSize; j<depth; ++j) tmp0 += cj.pmul(lhs0[j], rhs[j]); - res[i] += alpha*tmp0; + res[i*resIncr] += alpha*tmp0; } if (skipRows) { diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index abf8097d0..de4f0b7bb 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -119,11 +119,11 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co if (r>0) { Index s = IsLower ? 0 : pi + actualPanelWidth; - Block<Result,Dynamic,1> target(res,pi,0,actualPanelWidth,1); - ei_cache_friendly_product_rowmajor_times_vector<ConjLhs,ConjRhs>( + ei_cache_friendly_product_rowmajor_times_vector<ConjLhs,ConjRhs,Scalar,Index>( + actualPanelWidth, r, &(lhs.const_cast_derived().coeffRef(pi,s)), lhs.outerStride(), - &(rhs.const_cast_derived().coeffRef(s)), r, - target, alpha); + &(rhs.const_cast_derived().coeffRef(s)), 1, + &res.coeffRef(pi,0), res.innerStride(), alpha); } } } diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 1b7d03722..62a6207c6 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -49,9 +49,10 @@ template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, static void ei_cache_friendly_product_colmajor_times_vector( Index size, const Scalar* lhs, Index lhsStride, const RhsType& rhs, Scalar* res, Scalar alpha); -template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index, typename ResType> +template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index> static void ei_cache_friendly_product_rowmajor_times_vector( - const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsSize, ResType& res, Scalar alpha); + Index rows, Index Cols, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsIncr, + Scalar* res, Index resIncr, Scalar alpha); template<typename Scalar> struct ei_conj_helper<Scalar,Scalar,false,false> { @@ -107,6 +108,17 @@ template<typename RealScalar> struct ei_conj_helper<RealScalar, std::complex<Rea { return x * y; } }; +template<bool Conjugate> struct ei_conj_if; + +template<> struct ei_conj_if<true> { + template<typename T> + inline T operator()(const T& x) { return ei_conj(x); } +}; + +template<> struct ei_conj_if<false> { + template<typename T> + inline const T& operator()(const T& x) { return x; } +}; // Lightweight helper class to access matrix coefficients. // Yes, this is somehow redundant with Map<>, but this version is much much lighter, diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 9d73ef51c..b01ceafb2 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -222,16 +222,4 @@ template<typename T> struct ei_is_diagonal<DiagonalWrapper<T> > template<typename T, int S> struct ei_is_diagonal<DiagonalMatrix<T,S> > { enum { ret = true }; }; -template<bool Conjugate> struct ei_conj_if; - -template<> struct ei_conj_if<true> { - template<typename T> - inline T operator()(const T& x) { return ei_conj(x); } -}; - -template<> struct ei_conj_if<false> { - template<typename T> - inline const T& operator()(const T& x) { return x; } -}; - #endif // EIGEN_META_H |