aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2010-04-16 11:25:50 -0400
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2010-04-16 11:25:50 -0400
commit0ab431d7b860afc6766c7c20f7bb39a1d71bff62 (patch)
treef8da6ce3cc7738735f315f7954bbbabf48e0c621 /Eigen/src/Core
parentff6a46105d86e92753858c1b2aea8bcaf4575819 (diff)
parentea1a2df37092f88f5594dfea1f7e4996dd8e612d (diff)
* merge with mainline
* adapt Eigenvalues module to the new rule that the RowMajorBit must have the proper value for vectors * Fix RowMajorBit in ei_traits<ProductBase> * Fix vectorizability logic in CoeffBasedProduct
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/DenseBase.h2
-rw-r--r--Eigen/src/Core/Functors.h2
-rw-r--r--Eigen/src/Core/IO.h14
-rw-r--r--Eigen/src/Core/MathFunctions.h17
-rw-r--r--Eigen/src/Core/ProductBase.h2
-rw-r--r--Eigen/src/Core/arch/SSE/MathFunctions.h12
-rw-r--r--Eigen/src/Core/products/CoeffBasedProduct.h22
7 files changed, 55 insertions, 16 deletions
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 6459cd1b1..566b4b410 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -528,7 +528,7 @@ template<typename Derived> class DenseBase
#endif
// disable the use of evalTo for dense objects with a nice compilation error
- template<typename Dest> inline void evalTo(Dest& dst) const
+ template<typename Dest> inline void evalTo(Dest& ) const
{
EIGEN_STATIC_ASSERT((ei_is_same_type<Dest,void>::ret),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
}
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index c2b317cc0..d02633cb8 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -274,7 +274,7 @@ template<typename Scalar, typename NewType>
struct ei_scalar_cast_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_cast_op)
typedef NewType result_type;
- EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return static_cast<NewType>(a); }
+ EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return ei_cast<Scalar, NewType>(a); }
};
template<typename Scalar, typename NewType>
struct ei_functor_traits<ei_scalar_cast_op<Scalar,NewType> >
diff --git a/Eigen/src/Core/IO.h b/Eigen/src/Core/IO.h
index 3e8d2bc66..c98742246 100644
--- a/Eigen/src/Core/IO.h
+++ b/Eigen/src/Core/IO.h
@@ -126,6 +126,16 @@ DenseBase<Derived>::format(const IOFormat& fmt) const
return WithFormat<Derived>(derived(), fmt);
}
+template<typename Scalar>
+struct ei_significant_decimals_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ static inline int run()
+ {
+ return ei_cast<RealScalar,int>(std::ceil(-ei_log(NumTraits<RealScalar>::epsilon())/ei_log(RealScalar(10))));
+ }
+};
+
/** \internal
* print the matrix \a _m to the output stream \a s using the output format \a fmt */
template<typename Derived>
@@ -145,9 +155,7 @@ std::ostream & ei_print_matrix(std::ostream & s, const Derived& _m, const IOForm
{
if (NumTraits<Scalar>::HasFloatingPoint)
{
- typedef typename NumTraits<Scalar>::Real RealScalar;
- RealScalar explicit_precision_fp = std::ceil(-ei_log(NumTraits<Scalar>::epsilon())/ei_log(10.0));
- explicit_precision = static_cast<std::streamsize>(explicit_precision_fp);
+ explicit_precision = ei_significant_decimals_impl<Scalar>::run();
}
else
{
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index c97a68e50..4a21ec975 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -44,6 +44,23 @@ template<typename T> inline typename NumTraits<T>::Real ei_hypot(T x, T y)
return p * ei_sqrt(T(1) + qp*qp);
}
+// the point of wrapping these casts in this helper template struct is to allow users to specialize it to custom types
+// that may not have the needed conversion operators (especially as c++98 doesn't have explicit conversion operators).
+
+template<typename OldType, typename NewType> struct ei_cast_impl
+{
+ static inline NewType run(const OldType& x)
+ {
+ return static_cast<NewType>(x);
+ }
+};
+
+template<typename OldType, typename NewType> inline NewType ei_cast(const OldType& x)
+{
+ return ei_cast_impl<OldType, NewType>::run(x);
+}
+
+
/**************
*** int ***
**************/
diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h
index 4013f6ab1..b7c4ac11d 100644
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -42,7 +42,7 @@ struct ei_traits<ProductBase<Derived,_Lhs,_Rhs> > //: ei_traits<typename ei_clea
ColsAtCompileTime = ei_traits<Rhs>::ColsAtCompileTime,
MaxRowsAtCompileTime = ei_traits<Lhs>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ei_traits<Rhs>::MaxColsAtCompileTime,
- Flags = (RowsAtCompileTime==1 ? RowMajorBit : 0)
+ Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0)
| EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit,
// Note that EvalBeforeNestingBit and NestByRefBit
// are not used in practice because ei_nested is overloaded for products
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 3c0020248..99662eb6d 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -369,10 +369,14 @@ static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_pcos(Packet4f x)
// For detail see here: http://www.beyond3d.com/content/articles/8/
static EIGEN_UNUSED Packet4f ei_psqrt(Packet4f _x)
{
- Packet4f half = ei_pmul(_x, ei_pset1(.5f));
- Packet4f x = _mm_rsqrt_ps(_x);
- x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
- return ei_pmul(_x,x);
+ Packet4f half = ei_pmul(_x, ei_pset1(.5f));
+
+ /* select only the inverse sqrt of non-zero inputs */
+ Packet4f non_zero_mask = _mm_cmpgt_ps(_x, ei_pset1(std::numeric_limits<float>::epsilon()));
+ Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
+
+ x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
+ return ei_pmul(_x,x);
}
#endif // EIGEN_MATH_FUNCTIONS_SSE_H
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index 17fbc9190..2f7b32c65 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -72,10 +72,18 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
RhsRowMajor = RhsFlags & RowMajorBit,
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
- && (ColsAtCompileTime == Dynamic || (ColsAtCompileTime % ei_packet_traits<Scalar>::size) == 0),
+ && (ColsAtCompileTime == Dynamic
+ || ( (ColsAtCompileTime % ei_packet_traits<Scalar>::size) == 0
+ && (RhsFlags&AlignedBit)
+ )
+ ),
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
- && (RowsAtCompileTime == Dynamic || (RowsAtCompileTime % ei_packet_traits<Scalar>::size) == 0),
+ && (RowsAtCompileTime == Dynamic
+ || ( (RowsAtCompileTime % ei_packet_traits<Scalar>::size) == 0
+ && (LhsFlags&AlignedBit)
+ )
+ ),
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
@@ -84,8 +92,7 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
| (EvalToRowMajor ? RowMajorBit : 0)
| NestingFlags
- | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0)
- | (LhsFlags & RhsFlags & AlignedBit),
+ | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0),
CoeffReadCost = InnerSize == Dynamic ? Dynamic
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
@@ -96,8 +103,11 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
* the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
*/
- CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit)
- && (InnerSize % ei_packet_traits<Scalar>::size == 0)
+ CanVectorizeInner = LhsRowMajor
+ && (!RhsRowMajor)
+ && (LhsFlags & RhsFlags & ActualPacketAccessBit)
+ && (LhsFlags & RhsFlags & AlignedBit)
+ && (InnerSize % ei_packet_traits<Scalar>::size == 0)
};
};