diff options
-rw-r--r-- | Eigen/src/Core/NumTraits.h | 17 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 3 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/functors/BinaryFunctors.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/functors/UnaryFunctors.h | 11 | ||||
-rw-r--r-- | Eigen/src/Core/util/XprHelper.h | 14 | ||||
-rw-r--r-- | test/integer_types.cpp | 8 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 4 |
8 files changed, 39 insertions, 27 deletions
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 899dc672a..dd61195bc 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -97,23 +97,6 @@ template<typename T> struct GenericNumTraits MulCost = 1 }; - // Division is messy but important, because it is expensive and throughput - // varies significantly. The following numbers are based on min division - // throughput on Haswell. - template<bool Vectorized> - struct Div { - enum { -#ifdef EIGEN_VECTORIZE_AVX - AVX = true, -#else - AVX = false, -#endif - Cost = IsInteger ? (sizeof(T) == 8 ? (IsSigned ? 24 : 21) : (IsSigned ? 8 : 9)): - Vectorized ? (sizeof(T) == 8 ? (AVX ? 16 : 8) : (AVX ? 14 : 7)) : 8 - }; - }; - - typedef T Real; typedef typename internal::conditional< IsInteger, diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 7014a6889..dae0ca5d0 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -94,6 +94,9 @@ template<> struct packet_traits<double> : default_packet_traits }; }; +template<> struct scalar_div_cost<float,true> { enum { value = 14 }; }; +template<> struct scalar_div_cost<double,true> { enum { value = 16 }; }; + /* Proper support for integers is only provided by AVX2. In the meantime, we'll use SSE instructions and packets to deal with integers. template<> struct packet_traits<int> : default_packet_traits diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 0057e2062..baad692e3 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -162,6 +162,11 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; +#ifndef EIGEN_VECTORIZE_AVX +template<> struct scalar_div_cost<float,true> { enum { value = 7 }; }; +template<> struct scalar_div_cost<double,true> { enum { value = 8 }; }; +#endif + #if EIGEN_COMP_MSVC==1500 // Workaround MSVC 9 internal compiler error. // TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index dc3690444..d82ffed02 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -287,7 +287,7 @@ struct functor_traits<scalar_hypot_op<Scalar,Scalar> > { { Cost = 3 * NumTraits<Scalar>::AddCost + 2 * NumTraits<Scalar>::MulCost + - 2 * NumTraits<Scalar>::template Div<false>::Cost, + 2 * scalar_div_cost<Scalar,false>::value, PacketAccess = false }; }; @@ -375,7 +375,7 @@ struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > { typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type; enum { PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv, - Cost = NumTraits<result_type>::template Div<PacketAccess>::Cost + Cost = scalar_div_cost<result_type,PacketAccess>::value }; }; diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 59b9edf69..2009f8e57 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -248,7 +248,7 @@ struct functor_traits<scalar_exp_op<Scalar> > { // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other : (14 * NumTraits<Scalar>::AddCost + 6 * NumTraits<Scalar>::MulCost + - NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)) + scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)) #else Cost = (sizeof(Scalar) == 4 @@ -257,7 +257,7 @@ struct functor_traits<scalar_exp_op<Scalar> > { // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other : (23 * NumTraits<Scalar>::AddCost + 12 * NumTraits<Scalar>::MulCost + - NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)) + scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)) #endif }; }; @@ -514,17 +514,16 @@ struct functor_traits<scalar_tanh_op<Scalar> > { // 9 pmadd, 2 pmul, 1 div, 2 other ? (2 * NumTraits<Scalar>::AddCost + 6 * NumTraits<Scalar>::MulCost + - NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost) + scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value) #else ? (11 * NumTraits<Scalar>::AddCost + 11 * NumTraits<Scalar>::MulCost + - NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost) + scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value) #endif // This number assumes a naive implementation of tanh : (6 * NumTraits<Scalar>::AddCost + 3 * NumTraits<Scalar>::MulCost + - 2 * NumTraits<Scalar>::template Div< - packet_traits<Scalar>::HasDiv>::Cost + + 2 * scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value + functor_traits<scalar_exp_op<Scalar> >::Cost)) }; }; diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index e26a92022..ebe70f193 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -664,6 +664,20 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_acces return false; } +// Internal helper defining the cost of a scalar division for the type T. +// The default heuristic can be specialized for each scalar type and architecture. +template<typename T,bool Vectorized=false,typename EnaleIf = void> +struct scalar_div_cost { + enum { value = 8*NumTraits<T>::MulCost }; +}; + + +template<bool Vectorized> +struct scalar_div_cost<signed long,Vectorized,typename enable_if<sizeof(long)==8>::type> { enum { value = 24 }; }; +template<bool Vectorized> +struct scalar_div_cost<unsigned long,Vectorized,typename enable_if<sizeof(long)==8>::type> { enum { value = 21 }; }; + + #ifdef EIGEN_DEBUG_ASSIGN std::string demangle_traversal(int t) { diff --git a/test/integer_types.cpp b/test/integer_types.cpp index 950f8e9be..a21f73a81 100644 --- a/test/integer_types.cpp +++ b/test/integer_types.cpp @@ -158,4 +158,12 @@ void test_integer_types() CALL_SUBTEST_8( integer_type_tests(Matrix<unsigned long long, Dynamic, 5>(1, 5)) ); } +#ifdef EIGEN_TEST_PART_9 + VERIFY_IS_EQUAL(internal::scalar_div_cost<int>::value, 8); + VERIFY_IS_EQUAL(internal::scalar_div_cost<unsigned int>::value, 8); + if(sizeof(long)>sizeof(int)) { + VERIFY(internal::scalar_div_cost<long>::value > internal::scalar_div_cost<int>::value); + VERIFY(internal::scalar_div_cost<unsigned long>::value > internal::scalar_div_cost<int>::value); + } +#endif } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 3f623afa4..fc75dbb5c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -25,7 +25,7 @@ struct scalar_mod_op { }; template <typename Scalar> struct functor_traits<scalar_mod_op<Scalar> > -{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; }; +{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; }; /** \internal @@ -38,7 +38,7 @@ struct scalar_mod2_op { }; template <typename Scalar> struct functor_traits<scalar_mod2_op<Scalar> > -{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; }; +{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; }; template <typename Scalar> struct scalar_fmod_op { |