aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/NumTraits.h17
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h3
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h5
-rw-r--r--Eigen/src/Core/functors/BinaryFunctors.h4
-rw-r--r--Eigen/src/Core/functors/UnaryFunctors.h11
-rw-r--r--Eigen/src/Core/util/XprHelper.h14
-rw-r--r--test/integer_types.cpp8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h4
8 files changed, 39 insertions, 27 deletions
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index 899dc672a..dd61195bc 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -97,23 +97,6 @@ template<typename T> struct GenericNumTraits
MulCost = 1
};
- // Division is messy but important, because it is expensive and throughput
- // varies significantly. The following numbers are based on min division
- // throughput on Haswell.
- template<bool Vectorized>
- struct Div {
- enum {
-#ifdef EIGEN_VECTORIZE_AVX
- AVX = true,
-#else
- AVX = false,
-#endif
- Cost = IsInteger ? (sizeof(T) == 8 ? (IsSigned ? 24 : 21) : (IsSigned ? 8 : 9)):
- Vectorized ? (sizeof(T) == 8 ? (AVX ? 16 : 8) : (AVX ? 14 : 7)) : 8
- };
- };
-
-
typedef T Real;
typedef typename internal::conditional<
IsInteger,
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 7014a6889..dae0ca5d0 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -94,6 +94,9 @@ template<> struct packet_traits<double> : default_packet_traits
};
};
+template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
+
/* Proper support for integers is only provided by AVX2. In the meantime, we'll
use SSE instructions and packets to deal with integers.
template<> struct packet_traits<int> : default_packet_traits
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 0057e2062..baad692e3 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -162,6 +162,11 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4,
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
+#endif
+
#if EIGEN_COMP_MSVC==1500
// Workaround MSVC 9 internal compiler error.
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index dc3690444..d82ffed02 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -287,7 +287,7 @@ struct functor_traits<scalar_hypot_op<Scalar,Scalar> > {
{
Cost = 3 * NumTraits<Scalar>::AddCost +
2 * NumTraits<Scalar>::MulCost +
- 2 * NumTraits<Scalar>::template Div<false>::Cost,
+ 2 * scalar_div_cost<Scalar,false>::value,
PacketAccess = false
};
};
@@ -375,7 +375,7 @@ struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
enum {
PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv,
- Cost = NumTraits<result_type>::template Div<PacketAccess>::Cost
+ Cost = scalar_div_cost<result_type,PacketAccess>::value
};
};
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 59b9edf69..2009f8e57 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -248,7 +248,7 @@ struct functor_traits<scalar_exp_op<Scalar> > {
// double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other
: (14 * NumTraits<Scalar>::AddCost +
6 * NumTraits<Scalar>::MulCost +
- NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost))
+ scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
#else
Cost =
(sizeof(Scalar) == 4
@@ -257,7 +257,7 @@ struct functor_traits<scalar_exp_op<Scalar> > {
// double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other
: (23 * NumTraits<Scalar>::AddCost +
12 * NumTraits<Scalar>::MulCost +
- NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost))
+ scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
#endif
};
};
@@ -514,17 +514,16 @@ struct functor_traits<scalar_tanh_op<Scalar> > {
// 9 pmadd, 2 pmul, 1 div, 2 other
? (2 * NumTraits<Scalar>::AddCost +
6 * NumTraits<Scalar>::MulCost +
- NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)
+ scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
#else
? (11 * NumTraits<Scalar>::AddCost +
11 * NumTraits<Scalar>::MulCost +
- NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)
+ scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
#endif
// This number assumes a naive implementation of tanh
: (6 * NumTraits<Scalar>::AddCost +
3 * NumTraits<Scalar>::MulCost +
- 2 * NumTraits<Scalar>::template Div<
- packet_traits<Scalar>::HasDiv>::Cost +
+ 2 * scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value +
functor_traits<scalar_exp_op<Scalar> >::Cost))
};
};
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index e26a92022..ebe70f193 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -664,6 +664,20 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_acces
return false;
}
+// Internal helper defining the cost of a scalar division for the type T.
+// The default heuristic can be specialized for each scalar type and architecture.
+template<typename T,bool Vectorized=false,typename EnaleIf = void>
+struct scalar_div_cost {
+ enum { value = 8*NumTraits<T>::MulCost };
+};
+
+
+template<bool Vectorized>
+struct scalar_div_cost<signed long,Vectorized,typename enable_if<sizeof(long)==8>::type> { enum { value = 24 }; };
+template<bool Vectorized>
+struct scalar_div_cost<unsigned long,Vectorized,typename enable_if<sizeof(long)==8>::type> { enum { value = 21 }; };
+
+
#ifdef EIGEN_DEBUG_ASSIGN
std::string demangle_traversal(int t)
{
diff --git a/test/integer_types.cpp b/test/integer_types.cpp
index 950f8e9be..a21f73a81 100644
--- a/test/integer_types.cpp
+++ b/test/integer_types.cpp
@@ -158,4 +158,12 @@ void test_integer_types()
CALL_SUBTEST_8( integer_type_tests(Matrix<unsigned long long, Dynamic, 5>(1, 5)) );
}
+#ifdef EIGEN_TEST_PART_9
+ VERIFY_IS_EQUAL(internal::scalar_div_cost<int>::value, 8);
+ VERIFY_IS_EQUAL(internal::scalar_div_cost<unsigned int>::value, 8);
+ if(sizeof(long)>sizeof(int)) {
+ VERIFY(internal::scalar_div_cost<long>::value > internal::scalar_div_cost<int>::value);
+ VERIFY(internal::scalar_div_cost<unsigned long>::value > internal::scalar_div_cost<int>::value);
+ }
+#endif
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
index 3f623afa4..fc75dbb5c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@@ -25,7 +25,7 @@ struct scalar_mod_op {
};
template <typename Scalar>
struct functor_traits<scalar_mod_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; };
+{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; };
/** \internal
@@ -38,7 +38,7 @@ struct scalar_mod2_op {
};
template <typename Scalar>
struct functor_traits<scalar_mod2_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; };
+{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; };
template <typename Scalar>
struct scalar_fmod_op {