diff options
author | Benoit Jacob <jacob.benoit.1@gmail.com> | 2010-04-16 11:25:50 -0400 |
---|---|---|
committer | Benoit Jacob <jacob.benoit.1@gmail.com> | 2010-04-16 11:25:50 -0400 |
commit | 0ab431d7b860afc6766c7c20f7bb39a1d71bff62 (patch) | |
tree | f8da6ce3cc7738735f315f7954bbbabf48e0c621 /Eigen/src/Core | |
parent | ff6a46105d86e92753858c1b2aea8bcaf4575819 (diff) | |
parent | ea1a2df37092f88f5594dfea1f7e4996dd8e612d (diff) |
* merge with mainline
* adapt Eigenvalues module to the new rule that the RowMajorBit must have the proper value for vectors
* Fix RowMajorBit in ei_traits<ProductBase>
* Fix vectorizability logic in CoeffBasedProduct
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/DenseBase.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Functors.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/IO.h | 14 | ||||
-rw-r--r-- | Eigen/src/Core/MathFunctions.h | 17 | ||||
-rw-r--r-- | Eigen/src/Core/ProductBase.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/MathFunctions.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/products/CoeffBasedProduct.h | 22 |
7 files changed, 55 insertions, 16 deletions
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 6459cd1b1..566b4b410 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -528,7 +528,7 @@ template<typename Derived> class DenseBase #endif // disable the use of evalTo for dense objects with a nice compilation error - template<typename Dest> inline void evalTo(Dest& dst) const + template<typename Dest> inline void evalTo(Dest& ) const { EIGEN_STATIC_ASSERT((ei_is_same_type<Dest,void>::ret),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS); } diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index c2b317cc0..d02633cb8 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -274,7 +274,7 @@ template<typename Scalar, typename NewType> struct ei_scalar_cast_op { EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_cast_op) typedef NewType result_type; - EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return static_cast<NewType>(a); } + EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return ei_cast<Scalar, NewType>(a); } }; template<typename Scalar, typename NewType> struct ei_functor_traits<ei_scalar_cast_op<Scalar,NewType> > diff --git a/Eigen/src/Core/IO.h b/Eigen/src/Core/IO.h index 3e8d2bc66..c98742246 100644 --- a/Eigen/src/Core/IO.h +++ b/Eigen/src/Core/IO.h @@ -126,6 +126,16 @@ DenseBase<Derived>::format(const IOFormat& fmt) const return WithFormat<Derived>(derived(), fmt); } +template<typename Scalar> +struct ei_significant_decimals_impl +{ + typedef typename NumTraits<Scalar>::Real RealScalar; + static inline int run() + { + return ei_cast<RealScalar,int>(std::ceil(-ei_log(NumTraits<RealScalar>::epsilon())/ei_log(RealScalar(10)))); + } +}; + /** \internal * print the matrix \a _m to the output stream \a s using the output format \a fmt */ template<typename Derived> @@ -145,9 +155,7 @@ std::ostream & ei_print_matrix(std::ostream & s, const Derived& _m, const IOForm { if (NumTraits<Scalar>::HasFloatingPoint) { - typedef typename NumTraits<Scalar>::Real RealScalar; - RealScalar explicit_precision_fp = std::ceil(-ei_log(NumTraits<Scalar>::epsilon())/ei_log(10.0)); - explicit_precision = static_cast<std::streamsize>(explicit_precision_fp); + explicit_precision = ei_significant_decimals_impl<Scalar>::run(); } else { diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index c97a68e50..4a21ec975 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -44,6 +44,23 @@ template<typename T> inline typename NumTraits<T>::Real ei_hypot(T x, T y) return p * ei_sqrt(T(1) + qp*qp); } +// the point of wrapping these casts in this helper template struct is to allow users to specialize it to custom types +// that may not have the needed conversion operators (especially as c++98 doesn't have explicit conversion operators). + +template<typename OldType, typename NewType> struct ei_cast_impl +{ + static inline NewType run(const OldType& x) + { + return static_cast<NewType>(x); + } +}; + +template<typename OldType, typename NewType> inline NewType ei_cast(const OldType& x) +{ + return ei_cast_impl<OldType, NewType>::run(x); +} + + /************** *** int *** **************/ diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h index 4013f6ab1..b7c4ac11d 100644 --- a/Eigen/src/Core/ProductBase.h +++ b/Eigen/src/Core/ProductBase.h @@ -42,7 +42,7 @@ struct ei_traits<ProductBase<Derived,_Lhs,_Rhs> > //: ei_traits<typename ei_clea ColsAtCompileTime = ei_traits<Rhs>::ColsAtCompileTime, MaxRowsAtCompileTime = ei_traits<Lhs>::MaxRowsAtCompileTime, MaxColsAtCompileTime = ei_traits<Rhs>::MaxColsAtCompileTime, - Flags = (RowsAtCompileTime==1 ? RowMajorBit : 0) + Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0) | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit, // Note that EvalBeforeNestingBit and NestByRefBit // are not used in practice because ei_nested is overloaded for products diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 3c0020248..99662eb6d 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -369,10 +369,14 @@ static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_pcos(Packet4f x) // For detail see here: http://www.beyond3d.com/content/articles/8/ static EIGEN_UNUSED Packet4f ei_psqrt(Packet4f _x) { - Packet4f half = ei_pmul(_x, ei_pset1(.5f)); - Packet4f x = _mm_rsqrt_ps(_x); - x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x)))); - return ei_pmul(_x,x); + Packet4f half = ei_pmul(_x, ei_pset1(.5f)); + + /* select only the inverse sqrt of non-zero inputs */ + Packet4f non_zero_mask = _mm_cmpgt_ps(_x, ei_pset1(std::numeric_limits<float>::epsilon())); + Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x)); + + x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x)))); + return ei_pmul(_x,x); } #endif // EIGEN_MATH_FUNCTIONS_SSE_H diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h index 17fbc9190..2f7b32c65 100644 --- a/Eigen/src/Core/products/CoeffBasedProduct.h +++ b/Eigen/src/Core/products/CoeffBasedProduct.h @@ -72,10 +72,18 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > RhsRowMajor = RhsFlags & RowMajorBit, CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic || (ColsAtCompileTime % ei_packet_traits<Scalar>::size) == 0), + && (ColsAtCompileTime == Dynamic + || ( (ColsAtCompileTime % ei_packet_traits<Scalar>::size) == 0 + && (RhsFlags&AlignedBit) + ) + ), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic || (RowsAtCompileTime % ei_packet_traits<Scalar>::size) == 0), + && (RowsAtCompileTime == Dynamic + || ( (RowsAtCompileTime % ei_packet_traits<Scalar>::size) == 0 + && (LhsFlags&AlignedBit) + ) + ), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -84,8 +92,7 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) | (EvalToRowMajor ? RowMajorBit : 0) | NestingFlags - | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0) - | (LhsFlags & RhsFlags & AlignedBit), + | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0), CoeffReadCost = InnerSize == Dynamic ? Dynamic : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) @@ -96,8 +103,11 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. */ - CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) - && (InnerSize % ei_packet_traits<Scalar>::size == 0) + CanVectorizeInner = LhsRowMajor + && (!RhsRowMajor) + && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (LhsFlags & RhsFlags & AlignedBit) + && (InnerSize % ei_packet_traits<Scalar>::size == 0) }; }; |