* merge with mainline

* adapt Eigenvalues module to the new rule that the RowMajorBit must have the proper value for vectors * Fix RowMajorBit in ei_traits<ProductBase> * Fix vectorizability logic in CoeffBasedProduct
author: Benoit Jacob <jacob.benoit.1@gmail.com> 2010-04-16 11:25:50 -0400
committer: Benoit Jacob <jacob.benoit.1@gmail.com> 2010-04-16 11:25:50 -0400
commit: 0ab431d7b860afc6766c7c20f7bb39a1d71bff62 (patch)
tree: f8da6ce3cc7738735f315f7954bbbabf48e0c621 /Eigen/src/Core
parent: ff6a46105d86e92753858c1b2aea8bcaf4575819 (diff)
parent: ea1a2df37092f88f5594dfea1f7e4996dd8e612d (diff)
7 files changed, 55 insertions, 16 deletions
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 6459cd1b1..566b4b410 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -528,7 +528,7 @@ template<typename Derived> class DenseBase
     #endif
 
     // disable the use of evalTo for dense objects with a nice compilation error
-    template<typename Dest> inline void evalTo(Dest& dst) const
+    template<typename Dest> inline void evalTo(Dest& ) const
     {
       EIGEN_STATIC_ASSERT((ei_is_same_type<Dest,void>::ret),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
     }
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index c2b317cc0..d02633cb8 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -274,7 +274,7 @@ template<typename Scalar, typename NewType>
 struct ei_scalar_cast_op {
   EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_cast_op)
   typedef NewType result_type;
-  EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return static_cast<NewType>(a); }
+  EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return ei_cast<Scalar, NewType>(a); }
 };
 template<typename Scalar, typename NewType>
 struct ei_functor_traits<ei_scalar_cast_op<Scalar,NewType> >
diff --git a/Eigen/src/Core/IO.h b/Eigen/src/Core/IO.h
index 3e8d2bc66..c98742246 100644
--- a/Eigen/src/Core/IO.h
+++ b/Eigen/src/Core/IO.h
@@ -126,6 +126,16 @@ DenseBase<Derived>::format(const IOFormat& fmt) const
   return WithFormat<Derived>(derived(), fmt);
 }
 
+template<typename Scalar>
+struct ei_significant_decimals_impl
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  static inline int run()
+  {
+    return ei_cast<RealScalar,int>(std::ceil(-ei_log(NumTraits<RealScalar>::epsilon())/ei_log(RealScalar(10))));
+  }
+};
+
 /** \internal
   * print the matrix \a _m to the output stream \a s using the output format \a fmt */
 template<typename Derived>
@@ -145,9 +155,7 @@ std::ostream & ei_print_matrix(std::ostream & s, const Derived& _m, const IOForm
   {
     if (NumTraits<Scalar>::HasFloatingPoint)
     {
-      typedef typename NumTraits<Scalar>::Real RealScalar;
-      RealScalar explicit_precision_fp = std::ceil(-ei_log(NumTraits<Scalar>::epsilon())/ei_log(10.0));
-      explicit_precision = static_cast<std::streamsize>(explicit_precision_fp);
+      explicit_precision = ei_significant_decimals_impl<Scalar>::run();
     }
     else
     {
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index c97a68e50..4a21ec975 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -44,6 +44,23 @@ template<typename T> inline typename NumTraits<T>::Real ei_hypot(T x, T y)
   return p * ei_sqrt(T(1) + qp*qp);
 }
 
+// the point of wrapping these casts in this helper template struct is to allow users to specialize it to custom types
+// that may not have the needed conversion operators (especially as c++98 doesn't have explicit conversion operators).
+
+template<typename OldType, typename NewType> struct ei_cast_impl
+{
+  static inline NewType run(const OldType& x)
+  {
+    return static_cast<NewType>(x);
+  }
+};
+
+template<typename OldType, typename NewType> inline NewType ei_cast(const OldType& x)
+{
+  return ei_cast_impl<OldType, NewType>::run(x);
+}
+
+
 /**************
 ***   int   ***
 **************/
diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h
index 4013f6ab1..b7c4ac11d 100644
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -42,7 +42,7 @@ struct ei_traits<ProductBase<Derived,_Lhs,_Rhs> > //: ei_traits<typename ei_clea
     ColsAtCompileTime = ei_traits<Rhs>::ColsAtCompileTime,
     MaxRowsAtCompileTime = ei_traits<Lhs>::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = ei_traits<Rhs>::MaxColsAtCompileTime,
-    Flags = (RowsAtCompileTime==1 ? RowMajorBit : 0)
+    Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0)
           | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit,
                   // Note that EvalBeforeNestingBit and NestByRefBit
                   // are not used in practice because ei_nested is overloaded for products
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 3c0020248..99662eb6d 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -369,10 +369,14 @@ static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_pcos(Packet4f x)
 // For detail see here: http://www.beyond3d.com/content/articles/8/
 static EIGEN_UNUSED Packet4f ei_psqrt(Packet4f _x)
 {
-  Packet4f half = ei_pmul(_x, ei_pset1(.5f));
-  Packet4f x = _mm_rsqrt_ps(_x);
-  x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
-  return ei_pmul(_x,x);
+	Packet4f half = ei_pmul(_x, ei_pset1(.5f));
+	
+	/* select only the inverse sqrt of non-zero inputs */
+	Packet4f non_zero_mask = _mm_cmpgt_ps(_x, ei_pset1(std::numeric_limits<float>::epsilon()));
+	Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
+
+	x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
+	return ei_pmul(_x,x);
 }
 
 #endif // EIGEN_MATH_FUNCTIONS_SSE_H
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index 17fbc9190..2f7b32c65 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -72,10 +72,18 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
       RhsRowMajor = RhsFlags & RowMajorBit,
 
       CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
-                      && (ColsAtCompileTime == Dynamic || (ColsAtCompileTime % ei_packet_traits<Scalar>::size) == 0),
+                      && (ColsAtCompileTime == Dynamic
+                          || ( (ColsAtCompileTime % ei_packet_traits<Scalar>::size) == 0
+                              && (RhsFlags&AlignedBit)
+                             )
+                         ),
 
       CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
-                      && (RowsAtCompileTime == Dynamic || (RowsAtCompileTime % ei_packet_traits<Scalar>::size) == 0),
+                      && (RowsAtCompileTime == Dynamic
+                          || ( (RowsAtCompileTime % ei_packet_traits<Scalar>::size) == 0
+                              && (LhsFlags&AlignedBit)
+                             )
+                         ),
 
       EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
                      : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
@@ -84,8 +92,7 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
       Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
             | (EvalToRowMajor ? RowMajorBit : 0)
             | NestingFlags
-            | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0)
-            | (LhsFlags & RhsFlags & AlignedBit),
+            | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0),
 
       CoeffReadCost = InnerSize == Dynamic ? Dynamic
                     : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
@@ -96,8 +103,11 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
       * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
       * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
       */
-      CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit)
-                        && (InnerSize % ei_packet_traits<Scalar>::size == 0)
+      CanVectorizeInner =    LhsRowMajor
+                          && (!RhsRowMajor)
+                          && (LhsFlags & RhsFlags & ActualPacketAccessBit)
+                          && (LhsFlags & RhsFlags & AlignedBit)
+                          && (InnerSize % ei_packet_traits<Scalar>::size == 0)
     };
 };
author	Benoit Jacob <jacob.benoit.1@gmail.com>	2010-04-16 11:25:50 -0400
committer	Benoit Jacob <jacob.benoit.1@gmail.com>	2010-04-16 11:25:50 -0400
commit	0ab431d7b860afc6766c7c20f7bb39a1d71bff62 (patch)
tree	f8da6ce3cc7738735f315f7954bbbabf48e0c621 /Eigen/src/Core
parent	ff6a46105d86e92753858c1b2aea8bcaf4575819 (diff)
parent	ea1a2df37092f88f5594dfea1f7e4996dd8e612d (diff)