Pulled latest updates from trunk

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2015-10-29 17:57:48 -0700
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2015-10-29 17:57:48 -0700
commit: ca12d4c3b3d4ffa78b325fe8082ab372f59106ad (patch)
tree: 2b109b7596357494f83b10003157341203d5d83c /Eigen/src/Core/util
parent: 31bdafac67268ace9c4eeda4a225379609ce8b99 (diff)
parent: c444a0a8c3925ed07dc639259d616e771b28aef0 (diff)
4 files changed, 61 insertions, 20 deletions
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index ddb1cf6f4..c35077af6 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -30,6 +30,14 @@ const int DynamicIndex = 0xffffff;
   */
 const int Infinity = -1;
 
+/** This value means that the cost to evaluate an expression coefficient is either very expensive or
+  * cannot be known at compile time.
+  *
+  * This value has to be positive to (1) simplify cost computation, and (2) allow to distinguish between a very expensive and very very expensive expressions.
+  * It thus must also be large enough to make sure unrolling won't happen and that sub expressions will be evaluated, but not too large to avoid overflow.
+  */
+const int HugeCost = 10000;
+
 /** \defgroup flags Flags
   * \ingroup Core_Module
   *
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 6eb409194..ef35cefb4 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -11,6 +11,10 @@
 #ifndef EIGEN_META_H
 #define EIGEN_META_H
 
+#if defined(__CUDA_ARCH__)
+#include <cfloat>
+#endif
+
 namespace Eigen {
 
 namespace internal {
@@ -138,16 +142,16 @@ template<> struct numeric_limits<float>
   EIGEN_DEVICE_FUNC
   static float (max)() { return CUDART_MAX_NORMAL_F; }
   EIGEN_DEVICE_FUNC
-  static float (min)() { return __FLT_EPSILON__; }
+  static float (min)() { return FLT_MIN; }
 };
 template<> struct numeric_limits<double>
 {
   EIGEN_DEVICE_FUNC
   static double epsilon() { return __DBL_EPSILON__; }
   EIGEN_DEVICE_FUNC
-  static double (max)() { return CUDART_INF; }
+  static double (max)() { return DBL_MAX; }
   EIGEN_DEVICE_FUNC
-  static double (min)() { return __DBL_EPSILON__; }
+  static double (min)() { return DBL_MIN; }
 };
 template<> struct numeric_limits<int>
 {
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h
index 7538a0633..9d7302d81 100644
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -93,7 +93,8 @@
         THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
         OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
         IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
-        STORAGE_LAYOUT_DOES_NOT_MATCH
+        STORAGE_LAYOUT_DOES_NOT_MATCH,
+        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE
       };
     };
 
@@ -200,5 +201,9 @@
                                             >::value), \
                           YOU_CANNOT_MIX_ARRAYS_AND_MATRICES)
 
+// Check that a cost value is positive, and that is stay within a reasonable range
+// TODO this check could be enabled for internal debugging only
+#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \
+      EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE);
 
 #endif // EIGEN_STATIC_ASSERT_H
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 624d8a83b..f9e2959cc 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -397,28 +397,20 @@ struct transfer_constness
 template<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval
 {
   enum {
-    // For the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values.
-    // the choice of 10000 makes it larger than any practical fixed value and even most dynamic values.
-    // in extreme cases where these assumptions would be wrong, we would still at worst suffer performance issues
-    // (poor choice of temporaries).
-    // It's important that this value can still be squared without integer overflowing.
-    DynamicAsInteger = 10000,
     ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost,
-    ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost),
     CoeffReadCost = evaluator<T>::CoeffReadCost,  // NOTE What if an evaluator evaluate itself into a tempory?
                                                   //      Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1.
                                                   //      This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON
                                                   //      for all evaluator creating a temporary. This flag is then propagated by the parent evaluators.
                                                   //      Another solution could be to count the number of temps?
-    CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost),
-    NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n,
-    CostEvalAsInteger   = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger,
-    CostNoEvalAsInteger = NAsInteger * CoeffReadCostAsInteger
+    NAsInteger = n == Dynamic ? HugeCost : n,
+    CostEval   = (NAsInteger+1) * ScalarReadCost + CoeffReadCost,
+    CostNoEval = NAsInteger * CoeffReadCost
   };
 
   typedef typename conditional<
         ( (int(evaluator<T>::Flags) & EvalBeforeNestingBit) ||
-          (int(CostEvalAsInteger) < int(CostNoEvalAsInteger)) ),
+          (int(CostEval) < int(CostNoEval)) ),
         PlainObject,
         typename ref_selector<T>::type
   >::type type;
@@ -460,9 +452,9 @@ struct generic_xpr_base<Derived, XprKind, Dense>
 
 /** \internal Helper base class to add a scalar multiple operator
   * overloads for complex types */
-template<typename Derived,typename Scalar,typename OtherScalar,
+template<typename Derived, typename Scalar, typename OtherScalar, typename BaseType,
          bool EnableIt = !is_same<Scalar,OtherScalar>::value >
-struct special_scalar_op_base : public DenseCoeffsBase<Derived>
+struct special_scalar_op_base : public BaseType
 {
   // dummy operator* so that the
   // "using special_scalar_op_base::operator*" compiles
@@ -471,8 +463,8 @@ struct special_scalar_op_base : public DenseCoeffsBase<Derived>
   void operator/(dummy) const;
 };
 
-template<typename Derived,typename Scalar,typename OtherScalar>
-struct special_scalar_op_base<Derived,Scalar,OtherScalar,true>  : public DenseCoeffsBase<Derived>
+template<typename Derived,typename Scalar,typename OtherScalar, typename BaseType>
+struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true>  : public BaseType
 {
   const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
   operator*(const OtherScalar& scalar) const
@@ -670,6 +662,38 @@ template<typename T> struct is_same_or_void<void,T>     { enum { value = 1 }; };
 template<typename T> struct is_same_or_void<T,void>     { enum { value = 1 }; };
 template<>           struct is_same_or_void<void,void>  { enum { value = 1 }; };
 
+#ifdef EIGEN_DEBUG_ASSIGN
+std::string demangle_traversal(int t)
+{
+  if(t==DefaultTraversal) return "DefaultTraversal";
+  if(t==LinearTraversal) return "LinearTraversal";
+  if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal";
+  if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal";
+  if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal";
+  return "?";
+}
+std::string demangle_unrolling(int t)
+{
+  if(t==NoUnrolling) return "NoUnrolling";
+  if(t==InnerUnrolling) return "InnerUnrolling";
+  if(t==CompleteUnrolling) return "CompleteUnrolling";
+  return "?";
+}
+std::string demangle_flags(int f)
+{
+  std::string res;
+  if(f&RowMajorBit)                 res += " | RowMajor";
+  if(f&PacketAccessBit)             res += " | Packet";
+  if(f&LinearAccessBit)             res += " | Linear";
+  if(f&LvalueBit)                   res += " | Lvalue";
+  if(f&DirectAccessBit)             res += " | Direct";
+  if(f&NestByRefBit)                res += " | NestByRef";
+  if(f&NoPreferredStorageOrderBit)  res += " | NoPreferredStorageOrderBit";
+  
+  return res;
+}
+#endif
+
 } // end namespace internal
 
 // we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2015-10-29 17:57:48 -0700
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2015-10-29 17:57:48 -0700
commit	ca12d4c3b3d4ffa78b325fe8082ab372f59106ad (patch)
tree	2b109b7596357494f83b10003157341203d5d83c /Eigen/src/Core/util
parent	31bdafac67268ace9c4eeda4a225379609ce8b99 (diff)
parent	c444a0a8c3925ed07dc639259d616e771b28aef0 (diff)