From b2053990d078e15017610d526445ce78f2632edf Mon Sep 17 00:00:00 2001 From: Robert Lukierski Date: Wed, 14 Mar 2018 16:19:43 +0000 Subject: Adding EIGEN_DEVICE_FUNC to Products, especially Dense2Dense Assignment specializations. Otherwise causes problems with small fixed size matrix multiplication (call to 0x00 in call_assignment_no_alias in debug mode or trap in release with CUDA 9.1). --- Eigen/src/Core/ProductEvaluators.h | 50 +++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index fb637191d..5d2737d01 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -137,7 +137,7 @@ struct Assignment, internal::assign_op::type> { typedef Product SrcXprType; - static EIGEN_STRONG_INLINE + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { Index dstRows = src.rows(); @@ -155,7 +155,7 @@ struct Assignment, internal::add_assign_op< typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> { typedef Product SrcXprType; - static EIGEN_STRONG_INLINE + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); @@ -170,7 +170,7 @@ struct Assignment, internal::sub_assign_op< typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> { typedef Product SrcXprType; - static EIGEN_STRONG_INLINE + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); @@ -190,7 +190,7 @@ struct Assignment, const CwiseNullaryOp,Plain>, const Product > SrcXprType; - static EIGEN_STRONG_INLINE + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) { call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); @@ -217,7 +217,7 @@ template - static EIGEN_STRONG_INLINE + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/) { call_assignment_no_alias(dst, src.lhs(), Func1()); @@ -246,19 +246,19 @@ template struct generic_product_impl { template - static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); } template - static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); } template - static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } }; @@ -269,7 +269,7 @@ struct generic_product_impl // Column major result template -void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) +void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) { evaluator rhsEval(rhs); typename nested_eval::type actual_lhs(lhs); @@ -282,7 +282,7 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const // Row major result template -void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) +void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) { evaluator lhsEval(lhs); typename nested_eval::type actual_rhs(rhs); @@ -300,37 +300,37 @@ struct generic_product_impl typedef typename Product::Scalar Scalar; // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose - struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; - struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; - struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; + struct set { template EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; + struct add { template EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; + struct sub { template EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; struct adds { Scalar m_scale; explicit adds(const Scalar& s) : m_scale(s) {} - template void operator()(const Dst& dst, const Src& src) const { + template void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += m_scale * src; } }; template - static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major()); } template - static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major()); } template - static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major()); } template - static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major()); } @@ -345,15 +345,15 @@ struct generic_product_impl_base typedef typename Product::Scalar Scalar; template - static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } template - static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } template - static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } template @@ -373,7 +373,7 @@ struct generic_product_impl typedef typename internal::remove_all::type>::type MatrixType; template - static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { LhsNested actual_lhs(lhs); RhsNested actual_rhs(rhs); @@ -390,7 +390,7 @@ struct generic_product_impl typedef typename Product::Scalar Scalar; template - static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // Same as: dst.noalias() = lhs.lazyProduct(rhs); // but easier on the compiler side @@ -398,14 +398,14 @@ struct generic_product_impl } template - static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst.noalias() += lhs.lazyProduct(rhs); call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); } template - static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst.noalias() -= lhs.lazyProduct(rhs); call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); -- cgit v1.2.3 From 7134fa7a2eea469f35ea12899e693a633b5b42e5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 7 Jun 2018 09:33:10 +0200 Subject: Fix compilation with MSVC by reverting to char* for _mm_prefetch except for PGI (the later being the one that has the wrong prototype). --- Eigen/src/Core/arch/AVX/PacketMath.h | 6 +++--- Eigen/src/Core/arch/AVX512/PacketMath.h | 6 +++--- Eigen/src/Core/arch/SSE/Complex.h | 4 ++-- Eigen/src/Core/arch/SSE/PacketMath.h | 12 +++++++++--- 4 files changed, 17 insertions(+), 11 deletions(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 59aebd912..774e64981 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -318,9 +318,9 @@ template<> EIGEN_STRONG_INLINE void pstore1(int* to, const int& a) } #ifndef EIGEN_VECTORIZE_AVX512 -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } #endif template<> EIGEN_STRONG_INLINE float pfirst(const Packet8f& a) { diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index eb5de43d4..4e2e916de 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -604,9 +604,9 @@ EIGEN_STRONG_INLINE void pstore1(int* to, const int& a) { pstore(to, pa); } -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template <> EIGEN_STRONG_INLINE float pfirst(const Packet16f& a) { diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index c618cfaaa..d075043ce 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3))); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) { @@ -324,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex< template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet1cd& a) { diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index d1a7c65be..c944d2c0e 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -461,10 +461,16 @@ template<> EIGEN_STRONG_INLINE void pstore1(double* to, const double& pstore(to, Packet2d(vec2d_swizzle1(pa,0,0))); } +#if EIGEN_COMP_PGI +typedef const void * SsePrefetchPtrType; +#else +typedef const char * SsePrefetchPtrType; +#endif + #ifndef EIGEN_VECTORIZE_AVX -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } #endif #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 -- cgit v1.2.3 From 405859f18dac56f324e1d93ca8721d5f7fd22c62 Mon Sep 17 00:00:00 2001 From: Mark D Ryan Date: Thu, 17 May 2018 17:04:00 +0100 Subject: Set EIGEN_IDEAL_MAX_ALIGN_BYTES correctly for AVX512 builds bug #1548 The macro EIGEN_IDEAL_MAX_ALIGN_BYTES is being incorrectly set to 32 on AVX512 builds. It should be set to 64. In the current code it is only set to 64 if the macro EIGEN_VECTORIZE_AVX512 is defined. This macro does get defined in AVX512 builds in Core, but only after Macros.h, the file that defines EIGEN_IDEAL_MAX_ALIGN_BYTES, has been included. This commit fixes the issue by setting EIGEN_IDEAL_MAX_ALIGN_BYTES to 64 if __AVX512F__ is defined. --- Eigen/src/Core/util/Macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 9c68ecb7d..729fb3b1d 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -702,7 +702,7 @@ namespace Eigen { // If the user explicitly disable vectorization, then we also disable alignment #if defined(EIGEN_DONT_VECTORIZE) #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0 -#elif defined(EIGEN_VECTORIZE_AVX512) +#elif defined(__AVX512F__) // 64 bytes static alignment is preferred only if really required #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64 #elif defined(__AVX__) -- cgit v1.2.3 From 4bd158fa37b4bba74e6421575d5c69eeea547172 Mon Sep 17 00:00:00 2001 From: Michael Figurnov Date: Wed, 6 Jun 2018 18:49:26 +0100 Subject: Derivative of the incomplete Gamma function and the sample of a Gamma random variable. In addition to igamma(a, x), this code implements: * igamma_der_a(a, x) = d igamma(a, x) / da -- derivative of igamma with respect to the parameter * gamma_sample_der_alpha(alpha, sample) -- reparameterization derivative of a Gamma(alpha, 1) random variable sample with respect to the alpha parameter The derivatives are computed by forward mode differentiation of the igamma(a, x) code. Although gamma_sample_der_alpha can be implemented via igamma_der_a, a separate function is more accurate and efficient due to analytical cancellation of some terms. All three functions are implemented by a method parameterized with "mode" that always computes the derivatives, but does not return them unless required by the mode. The compiler is expected to (and, based on benchmarks, does) skip the unnecessary computations depending on the mode. --- Eigen/src/Core/GenericPacketMath.h | 2 + Eigen/src/Core/arch/CUDA/PacketMath.h | 4 + unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 14 + unsupported/Eigen/SpecialFunctions | 2 + .../SpecialFunctions/SpecialFunctionsArrayAPI.h | 42 ++ .../SpecialFunctions/SpecialFunctionsFunctors.h | 54 ++ .../src/SpecialFunctions/SpecialFunctionsHalf.h | 8 + .../src/SpecialFunctions/SpecialFunctionsImpl.h | 561 +++++++++++++-------- .../SpecialFunctions/SpecialFunctionsPacketMath.h | 15 + .../arch/CUDA/CudaSpecialFunctions.h | 35 ++ unsupported/test/cxx11_tensor_cuda.cu | 157 ++++++ unsupported/test/special_functions.cpp | 92 ++++ 12 files changed, 785 insertions(+), 201 deletions(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 888a3f7ea..55b6a89e2 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -85,6 +85,8 @@ struct default_packet_traits HasI0e = 0, HasI1e = 0, HasIGamma = 0, + HasIGammaDerA = 0, + HasGammaSampleDerAlpha = 0, HasIGammac = 0, HasBetaInc = 0, diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 704a4e0d9..ab8e477f4 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -47,6 +47,8 @@ template<> struct packet_traits : default_packet_traits HasI0e = 1, HasI1e = 1, HasIGamma = 1, + HasIGammaDerA = 1, + HasGammaSampleDerAlpha = 1, HasIGammac = 1, HasBetaInc = 1, @@ -78,6 +80,8 @@ template<> struct packet_traits : default_packet_traits HasI0e = 1, HasI1e = 1, HasIGamma = 1, + HasIGammaDerA = 1, + HasGammaSampleDerAlpha = 1, HasIGammac = 1, HasBetaInc = 1, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index a942c98dd..88b0af0d3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -152,6 +152,20 @@ class TensorBase return binaryExpr(other.derived(), internal::scalar_igamma_op()); } + // igamma_der_a(a = this, x = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + igamma_der_a(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_igamma_der_a_op()); + } + + // gamma_sample_der_alpha(alpha = this, sample = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + gamma_sample_der_alpha(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_gamma_sample_der_alpha_op()); + } + // igammac(a = this, x = other) template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const OtherDerived> diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions index 482ec6e6f..9441ba8f5 100644 --- a/unsupported/Eigen/SpecialFunctions +++ b/unsupported/Eigen/SpecialFunctions @@ -29,6 +29,8 @@ namespace Eigen { * - erfc * - lgamma * - igamma + * - igamma_der_a + * - gamma_sample_der_alpha * - igammac * - digamma * - polygamma diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h index b7a9d035b..30cdf4751 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h @@ -33,6 +33,48 @@ igamma(const Eigen::ArrayBase& a, const Eigen::ArrayBase +inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> +igamma_der_a(const Eigen::ArrayBase& a, const Eigen::ArrayBase& x) { + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( + a.derived(), + x.derived()); +} + +/** \cpp11 \returns an expression of the coefficient-wise gamma_sample_der_alpha(\a alpha, \a sample) to the given arrays. + * + * This function computes the coefficient-wise derivative of the sample + * of a Gamma(alpha, 1) random variable with respect to the parameter alpha. + * + * \note This function supports only float and double scalar types in c++11 + * mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations + * of gamma_sample_der_alpha(T,T) for any scalar + * type T to be supported. + * + * \sa Eigen::igamma(), Eigen::lgamma() + */ +template +inline const Eigen::CwiseBinaryOp, const AlphaDerived, const SampleDerived> +gamma_sample_der_alpha(const Eigen::ArrayBase& alpha, const Eigen::ArrayBase& sample) { + return Eigen::CwiseBinaryOp, const AlphaDerived, const SampleDerived>( + alpha.derived(), + sample.derived()); +} + /** \cpp11 \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays. * * This function computes the coefficient-wise complementary incomplete gamma function. diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h index 8420f0174..3a63dcdd6 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h @@ -41,6 +41,60 @@ struct functor_traits > { }; }; +/** \internal + * \brief Template functor to compute the derivative of the incomplete gamma + * function igamma_der_a(a, x) + * + * \sa class CwiseBinaryOp, Cwise::igamma_der_a + */ +template +struct scalar_igamma_der_a_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_der_a_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a, const Scalar& x) const { + using numext::igamma_der_a; + return igamma_der_a(a, x); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const { + return internal::pigamma_der_a(a, x); + } +}; +template +struct functor_traits > { + enum { + // 2x the cost of igamma + Cost = 40 * NumTraits::MulCost + 20 * NumTraits::AddCost, + PacketAccess = packet_traits::HasIGammaDerA + }; +}; + +/** \internal + * \brief Template functor to compute the derivative of the sample + * of a Gamma(alpha, 1) random variable with respect to the parameter alpha + * gamma_sample_der_alpha(alpha, sample) + * + * \sa class CwiseBinaryOp, Cwise::gamma_sample_der_alpha + */ +template +struct scalar_gamma_sample_der_alpha_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_gamma_sample_der_alpha_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& alpha, const Scalar& sample) const { + using numext::gamma_sample_der_alpha; + return gamma_sample_der_alpha(alpha, sample); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& alpha, const Packet& sample) const { + return internal::pgamma_sample_der_alpha(alpha, sample); + } +}; +template +struct functor_traits > { + enum { + // 2x the cost of igamma, minus the lgamma cost (the lgamma cancels out) + Cost = 30 * NumTraits::MulCost + 15 * NumTraits::AddCost, + PacketAccess = packet_traits::HasGammaSampleDerAlpha + }; +}; /** \internal * \brief Template functor to compute the complementary incomplete gamma function igammac(a, x) diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h index c5867002e..fbdfd299e 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h @@ -33,6 +33,14 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::h template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) { return Eigen::half(Eigen::numext::igamma(static_cast(a), static_cast(x))); } +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma_der_a(const Eigen::half& a, const Eigen::half& x) { + return Eigen::half(Eigen::numext::igamma_der_a(static_cast(a), static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half gamma_sample_der_alpha(const Eigen::half& alpha, const Eigen::half& sample) { + return Eigen::half(Eigen::numext::gamma_sample_der_alpha(static_cast(alpha), static_cast(sample))); +} template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) { return Eigen::half(Eigen::numext::igammac(static_cast(a), static_cast(x))); } diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h index 6c7ac3f3b..b24df2a95 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h @@ -521,6 +521,198 @@ struct cephes_helper { } }; +enum IgammaComputationMode { VALUE, DERIVATIVE, SAMPLE_DERIVATIVE }; + +template +EIGEN_DEVICE_FUNC +int igamma_num_iterations() { + /* Returns the maximum number of internal iterations for igamma computation. + */ + if (mode == VALUE) { + return 2000; + } + + if (internal::is_same::value) { + return 200; + } else if (internal::is_same::value) { + return 500; + } else { + return 2000; + } +} + +template +struct igammac_cf_impl { + /* Computes igamc(a, x) or derivative (depending on the mode) + * using the continued fraction expansion of the complementary + * incomplete Gamma function. + * + * Preconditions: + * a > 0 + * x >= 1 + * x >= a + */ + EIGEN_DEVICE_FUNC + static Scalar run(Scalar a, Scalar x) { + const Scalar zero = 0; + const Scalar one = 1; + const Scalar two = 2; + const Scalar machep = cephes_helper::machep(); + const Scalar big = cephes_helper::big(); + const Scalar biginv = cephes_helper::biginv(); + + if ((numext::isinf)(x)) { + return zero; + } + + // continued fraction + Scalar y = one - a; + Scalar z = x + y + one; + Scalar c = zero; + Scalar pkm2 = one; + Scalar qkm2 = x; + Scalar pkm1 = x + one; + Scalar qkm1 = z * x; + Scalar ans = pkm1 / qkm1; + + Scalar dpkm2_da = zero; + Scalar dqkm2_da = zero; + Scalar dpkm1_da = zero; + Scalar dqkm1_da = -x; + Scalar dans_da = (dpkm1_da - ans * dqkm1_da) / qkm1; + + for (int i = 0; i < igamma_num_iterations(); i++) { + c += one; + y += one; + z += two; + + Scalar yc = y * c; + Scalar pk = pkm1 * z - pkm2 * yc; + Scalar qk = qkm1 * z - qkm2 * yc; + + Scalar dpk_da = dpkm1_da * z - pkm1 - dpkm2_da * yc + pkm2 * c; + Scalar dqk_da = dqkm1_da * z - qkm1 - dqkm2_da * yc + qkm2 * c; + + if (qk != zero) { + Scalar ans_prev = ans; + ans = pk / qk; + + Scalar dans_da_prev = dans_da; + dans_da = (dpk_da - ans * dqk_da) / qk; + + if (mode == VALUE) { + if (numext::abs(ans_prev - ans) <= machep * numext::abs(ans)) { + break; + } + } else { + if (numext::abs(dans_da - dans_da_prev) <= + machep * numext::abs(dans_da)) { + break; + } + } + } + + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + + dpkm2_da = dpkm1_da; + dpkm1_da = dpk_da; + dqkm2_da = dqkm1_da; + dqkm1_da = dqk_da; + + if (numext::abs(pk) > big) { + pkm2 *= biginv; + pkm1 *= biginv; + qkm2 *= biginv; + qkm1 *= biginv; + + dpkm2_da *= biginv; + dpkm1_da *= biginv; + dqkm2_da *= biginv; + dqkm1_da *= biginv; + } + } + + /* Compute x**a * exp(-x) / gamma(a) */ + Scalar logax = a * numext::log(x) - x - lgamma_impl::run(a); + Scalar dlogax_da = numext::log(x) - digamma_impl::run(a); + Scalar ax = numext::exp(logax); + Scalar dax_da = ax * dlogax_da; + + switch (mode) { + case VALUE: + return ans * ax; + case DERIVATIVE: + return ans * dax_da + dans_da * ax; + case SAMPLE_DERIVATIVE: + return -(dans_da + ans * dlogax_da) * x; + } + } +}; + +template +struct igamma_series_impl { + /* Computes igam(a, x) or its derivative (depending on the mode) + * using the series expansion of the incomplete Gamma function. + * + * Preconditions: + * x > 0 + * a > 0 + * !(x > 1 && x > a) + */ + EIGEN_DEVICE_FUNC + static Scalar run(Scalar a, Scalar x) { + const Scalar zero = 0; + const Scalar one = 1; + const Scalar machep = cephes_helper::machep(); + + /* power series */ + Scalar r = a; + Scalar c = one; + Scalar ans = one; + + Scalar dc_da = zero; + Scalar dans_da = zero; + + for (int i = 0; i < igamma_num_iterations(); i++) { + r += one; + Scalar term = x / r; + Scalar dterm_da = -x / (r * r); + dc_da = term * dc_da + dterm_da * c; + dans_da += dc_da; + c *= term; + ans += c; + + if (mode == VALUE) { + if (c <= machep * ans) { + break; + } + } else { + if (numext::abs(dc_da) <= machep * numext::abs(dans_da)) { + break; + } + } + } + + /* Compute x**a * exp(-x) / gamma(a + 1) */ + Scalar logax = a * numext::log(x) - x - lgamma_impl::run(a + one); + Scalar dlogax_da = numext::log(x) - digamma_impl::run(a + one); + Scalar ax = numext::exp(logax); + Scalar dax_da = ax * dlogax_da; + + switch (mode) { + case VALUE: + return ans * ax; + case DERIVATIVE: + return ans * dax_da + dans_da * ax; + case SAMPLE_DERIVATIVE: + return -(dans_da + ans * dlogax_da) * x / a; + } + } +}; + #if !EIGEN_HAS_C99_MATH template @@ -535,8 +727,6 @@ struct igammac_impl { #else -template struct igamma_impl; // predeclare igamma_impl - template struct igammac_impl { EIGEN_DEVICE_FUNC @@ -604,97 +794,15 @@ struct igammac_impl { return nan; } - if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans + if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans return nan; } if ((x < one) || (x < a)) { - /* The checks above ensure that we meet the preconditions for - * igamma_impl::Impl(), so call it, rather than igamma_impl::Run(). - * Calling Run() would also work, but in that case the compiler may not be - * able to prove that igammac_impl::Run and igamma_impl::Run are not - * mutually recursive. This leads to worse code, particularly on - * platforms like nvptx, where recursion is allowed only begrudgingly. - */ - return (one - igamma_impl::Impl(a, x)); - } - - return Impl(a, x); - } - - private: - /* igamma_impl calls igammac_impl::Impl. */ - friend struct igamma_impl; - - /* Actually computes igamc(a, x). - * - * Preconditions: - * a > 0 - * x >= 1 - * x >= a - */ - EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) { - const Scalar zero = 0; - const Scalar one = 1; - const Scalar two = 2; - const Scalar machep = cephes_helper::machep(); - const Scalar maxlog = numext::log(NumTraits::highest()); - const Scalar big = cephes_helper::big(); - const Scalar biginv = cephes_helper::biginv(); - const Scalar inf = NumTraits::infinity(); - - Scalar ans, ax, c, yc, r, t, y, z; - Scalar pk, pkm1, pkm2, qk, qkm1, qkm2; - - if (x == inf) return zero; // std::isinf crashes on CUDA - - /* Compute x**a * exp(-x) / gamma(a) */ - ax = a * numext::log(x) - x - lgamma_impl::run(a); - if (ax < -maxlog) { // underflow - return zero; - } - ax = numext::exp(ax); - - // continued fraction - y = one - a; - z = x + y + one; - c = zero; - pkm2 = one; - qkm2 = x; - pkm1 = x + one; - qkm1 = z * x; - ans = pkm1 / qkm1; - - for (int i = 0; i < 2000; i++) { - c += one; - y += one; - z += two; - yc = y * c; - pk = pkm1 * z - pkm2 * yc; - qk = qkm1 * z - qkm2 * yc; - if (qk != zero) { - r = pk / qk; - t = numext::abs((ans - r) / r); - ans = r; - } else { - t = one; - } - pkm2 = pkm1; - pkm1 = pk; - qkm2 = qkm1; - qkm1 = qk; - if (numext::abs(pk) > big) { - pkm2 *= biginv; - pkm1 *= biginv; - qkm2 *= biginv; - qkm1 *= biginv; - } - if (t <= machep) { - break; - } + return (one - igamma_series_impl::run(a, x)); } - return (ans * ax); + return igammac_cf_impl::run(a, x); } }; @@ -704,15 +812,10 @@ struct igammac_impl { * Implementation of igamma (incomplete gamma integral), based on Cephes but requires C++11/C99 * ************************************************************************************************/ -template -struct igamma_retval { - typedef Scalar type; -}; - #if !EIGEN_HAS_C99_MATH -template -struct igamma_impl { +template +struct igamma_generic_impl { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) { EIGEN_STATIC_ASSERT((internal::is_same::value == false), @@ -723,69 +826,17 @@ struct igamma_impl { #else -template -struct igamma_impl { +template +struct igamma_generic_impl { EIGEN_DEVICE_FUNC static Scalar run(Scalar a, Scalar x) { - /* igam() - * Incomplete gamma integral - * - * - * - * SYNOPSIS: - * - * double a, x, y, igam(); - * - * y = igam( a, x ); - * - * DESCRIPTION: - * - * The function is defined by - * - * x - * - - * 1 | | -t a-1 - * igam(a,x) = ----- | e t dt. - * - | | - * | (a) - - * 0 - * - * - * In this implementation both arguments must be positive. - * The integral is evaluated by either a power series or - * continued fraction expansion, depending on the relative - * values of a and x. - * - * ACCURACY (double): - * - * Relative error: - * arithmetic domain # trials peak rms - * IEEE 0,30 200000 3.6e-14 2.9e-15 - * IEEE 0,100 300000 9.9e-14 1.5e-14 - * - * - * ACCURACY (float): - * - * Relative error: - * arithmetic domain # trials peak rms - * IEEE 0,30 20000 7.8e-6 5.9e-7 - * - */ - /* - Cephes Math Library Release 2.2: June, 1992 - Copyright 1985, 1987, 1992 by Stephen L. Moshier - Direct inquiries to 30 Frost Street, Cambridge, MA 02140 - */ - - - /* left tail of incomplete gamma function: - * - * inf. k - * a -x - x - * x e > ---------- - * - - - * k=0 | (a+k+1) + /* Depending on the mode, returns + * - VALUE: incomplete Gamma function igamma(a, x) + * - DERIVATIVE: derivative of incomplete Gamma function d/da igamma(a, x) + * - SAMPLE_DERIVATIVE: implicit derivative of a Gamma random variable + * x ~ Gamma(x | a, 1), dx/da = -1 / Gamma(x | a, 1) * d igamma(a, x) / dx * + * Derivatives are implemented by forward-mode differentiation. */ const Scalar zero = 0; const Scalar one = 1; @@ -797,71 +848,167 @@ struct igamma_impl { return nan; } - if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans + if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans return nan; } if ((x > one) && (x > a)) { - /* The checks above ensure that we meet the preconditions for - * igammac_impl::Impl(), so call it, rather than igammac_impl::Run(). - * Calling Run() would also work, but in that case the compiler may not be - * able to prove that igammac_impl::Run and igamma_impl::Run are not - * mutually recursive. This leads to worse code, particularly on - * platforms like nvptx, where recursion is allowed only begrudgingly. - */ - return (one - igammac_impl::Impl(a, x)); + Scalar ret = igammac_cf_impl::run(a, x); + if (mode == VALUE) { + return one - ret; + } else { + return -ret; + } } - return Impl(a, x); + return igamma_series_impl::run(a, x); } +}; - private: - /* igammac_impl calls igamma_impl::Impl. */ - friend struct igammac_impl; +#endif // EIGEN_HAS_C99_MATH - /* Actually computes igam(a, x). +template +struct igamma_retval { + typedef Scalar type; +}; + +template +struct igamma_impl : igamma_generic_impl { + /* igam() + * Incomplete gamma integral. + * + * The CDF of Gamma(a, 1) random variable at the point x. + * + * Accuracy estimation. For each a in [10^-2, 10^-1...10^3] we sample + * 50 Gamma random variables x ~ Gamma(x | a, 1), a total of 300 points. + * The ground truth is computed by mpmath. Mean absolute error: + * float: 1.26713e-05 + * double: 2.33606e-12 + * + * Cephes documentation below. + * + * SYNOPSIS: + * + * double a, x, y, igam(); + * + * y = igam( a, x ); + * + * DESCRIPTION: + * + * The function is defined by + * + * x + * - + * 1 | | -t a-1 + * igam(a,x) = ----- | e t dt. + * - | | + * | (a) - + * 0 + * + * + * In this implementation both arguments must be positive. + * The integral is evaluated by either a power series or + * continued fraction expansion, depending on the relative + * values of a and x. + * + * ACCURACY (double): + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 200000 3.6e-14 2.9e-15 + * IEEE 0,100 300000 9.9e-14 1.5e-14 + * + * + * ACCURACY (float): + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 20000 7.8e-6 5.9e-7 * - * Preconditions: - * x > 0 - * a > 0 - * !(x > 1 && x > a) */ - EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) { - const Scalar zero = 0; - const Scalar one = 1; - const Scalar machep = cephes_helper::machep(); - const Scalar maxlog = numext::log(NumTraits::highest()); + /* + Cephes Math Library Release 2.2: June, 1992 + Copyright 1985, 1987, 1992 by Stephen L. Moshier + Direct inquiries to 30 Frost Street, Cambridge, MA 02140 + */ - Scalar ans, ax, c, r; + /* left tail of incomplete gamma function: + * + * inf. k + * a -x - x + * x e > ---------- + * - - + * k=0 | (a+k+1) + * + */ +}; - /* Compute x**a * exp(-x) / gamma(a) */ - ax = a * numext::log(x) - x - lgamma_impl::run(a); - if (ax < -maxlog) { - // underflow - return zero; - } - ax = numext::exp(ax); +template +struct igamma_der_a_retval : igamma_retval {}; - /* power series */ - r = a; - c = one; - ans = one; +template +struct igamma_der_a_impl : igamma_generic_impl { + /* Derivative of the incomplete Gamma function with respect to a. + * + * Computes d/da igamma(a, x) by forward differentiation of the igamma code. + * + * Accuracy estimation. For each a in [10^-2, 10^-1...10^3] we sample + * 50 Gamma random variables x ~ Gamma(x | a, 1), a total of 300 points. + * The ground truth is computed by mpmath. Mean absolute error: + * float: 6.27648e-07 + * double: 4.60455e-12 + * + * Reference: + * R. Moore. "Algorithm AS 187: Derivatives of the incomplete gamma + * integral". Journal of the Royal Statistical Society. 1982 + */ +}; - for (int i = 0; i < 2000; i++) { - r += one; - c *= x/r; - ans += c; - if (c/ans <= machep) { - break; - } - } +template +struct gamma_sample_der_alpha_retval : igamma_retval {}; - return (ans * ax / a); - } +template +struct gamma_sample_der_alpha_impl + : igamma_generic_impl { + /* Derivative of a Gamma random variable sample with respect to alpha. + * + * Consider a sample of a Gamma random variable with the concentration + * parameter alpha: sample ~ Gamma(alpha, 1). The reparameterization + * derivative that we want to compute is dsample / dalpha = + * d igammainv(alpha, u) / dalpha, where u = igamma(alpha, sample). + * However, this formula is numerically unstable and expensive, so instead + * we use implicit differentiation: + * + * igamma(alpha, sample) = u, where u ~ Uniform(0, 1). + * Apply d / dalpha to both sides: + * d igamma(alpha, sample) / dalpha + * + d igamma(alpha, sample) / dsample * dsample/dalpha = 0 + * d igamma(alpha, sample) / dalpha + * + Gamma(sample | alpha, 1) dsample / dalpha = 0 + * dsample/dalpha = - (d igamma(alpha, sample) / dalpha) + * / Gamma(sample | alpha, 1) + * + * Here Gamma(sample | alpha, 1) is the PDF of the Gamma distribution + * (note that the derivative of the CDF w.r.t. sample is the PDF). + * See the reference below for more details. + * + * The derivative of igamma(alpha, sample) is computed by forward + * differentiation of the igamma code. Division by the Gamma PDF is performed + * in the same code, increasing the accuracy and speed due to cancellation + * of some terms. + * + * Accuracy estimation. For each alpha in [10^-2, 10^-1...10^3] we sample + * 50 Gamma random variables sample ~ Gamma(sample | alpha, 1), a total of 300 + * points. The ground truth is computed by mpmath. Mean absolute error: + * float: 1.0993e-06 + * double: 1.47631e-12 + * + * Reference: + * M. Figurnov, S. Mohamed, A. Mnih "Implicit Reparameterization Gradients". + * 2018 + */ }; -#endif // EIGEN_HAS_C99_MATH - /***************************************************************************** * Implementation of Riemann zeta function of two arguments, based on Cephes * *****************************************************************************/ @@ -1950,6 +2097,18 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar) return EIGEN_MATHFUNC_IMPL(igamma, Scalar)::run(a, x); } +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma_der_a, Scalar) + igamma_der_a(const Scalar& a, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(igamma_der_a, Scalar)::run(a, x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(gamma_sample_der_alpha, Scalar) + gamma_sample_der_alpha(const Scalar& a, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(gamma_sample_der_alpha, Scalar)::run(a, x); +} + template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igammac, Scalar) igammac(const Scalar& a, const Scalar& x) { diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h index 4c176716b..465f41d54 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h @@ -42,6 +42,21 @@ Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); } +/** \internal \returns the derivative of the incomplete gamma function + * igamma_der_a(\a a, \a x) */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigamma_der_a(const Packet& a, const Packet& x) { + using numext::igamma_der_a; return igamma_der_a(a, x); +} + +/** \internal \returns compute the derivative of the sample + * of Gamma(alpha, 1) random variable with respect to the parameter a + * gamma_sample_der_alpha(\a alpha, \a sample) */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pgamma_sample_der_alpha(const Packet& alpha, const Packet& sample) { + using numext::gamma_sample_der_alpha; return gamma_sample_der_alpha(alpha, sample); +} + /** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); } diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h index c25fea0b3..020ac1b62 100644 --- a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h +++ b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h @@ -120,6 +120,41 @@ double2 pigamma(const double2& a, const double2& x) return make_double2(igamma(a.x, x.x), igamma(a.y, x.y)); } +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pigamma_der_a( + const float4& a, const float4& x) { + using numext::igamma_der_a; + return make_float4(igamma_der_a(a.x, x.x), igamma_der_a(a.y, x.y), + igamma_der_a(a.z, x.z), igamma_der_a(a.w, x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pigamma_der_a(const double2& a, const double2& x) { + using numext::igamma_der_a; + return make_double2(igamma_der_a(a.x, x.x), igamma_der_a(a.y, x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pgamma_sample_der_alpha( + const float4& alpha, const float4& sample) { + using numext::gamma_sample_der_alpha; + return make_float4( + gamma_sample_der_alpha(alpha.x, sample.x), + gamma_sample_der_alpha(alpha.y, sample.y), + gamma_sample_der_alpha(alpha.z, sample.z), + gamma_sample_der_alpha(alpha.w, sample.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pgamma_sample_der_alpha(const double2& alpha, const double2& sample) { + using numext::gamma_sample_der_alpha; + return make_double2( + gamma_sample_der_alpha(alpha.x, sample.x), + gamma_sample_der_alpha(alpha.y, sample.y)); +} + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pigammac(const float4& a, const float4& x) { diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 63d0a345a..f238ed5be 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -1318,6 +1318,157 @@ void test_cuda_i1e() cudaFree(d_out); } +template +void test_cuda_igamma_der_a() +{ + Tensor in_x(30); + Tensor in_a(30); + Tensor out(30); + Tensor expected_out(30); + out.setZero(); + + Array in_a_array(30); + Array in_x_array(30); + Array expected_out_array(30); + + // See special_functions.cpp for the Python code that generates the test data. + + in_a_array << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 1.0, + 1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, 100.0, + 100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0; + + in_x_array << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05, + 1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16, 0.0132865061065, + 0.0200034203853, 6.29263709118e-17, 1.37160367764e-06, 0.333412038288, + 1.18135687766, 0.580629033777, 0.170631439426, 0.786686768458, + 7.63873279537, 13.1944344379, 11.896042354, 10.5830172417, 10.5020942233, + 92.8918587747, 95.003720371, 86.3715926467, 96.0330217672, 82.6389930677, + 968.702906754, 969.463546828, 1001.79726022, 955.047416547, 1044.27458568; + + expected_out_array << -32.7256441441, -36.4394150514, -9.66467612263, + -36.4394150514, -36.4394150514, -1.0891900302, -2.66351229645, + -2.48666868596, -0.929700494428, -3.56327722764, -0.455320135314, + -0.391437214323, -0.491352055991, -0.350454834292, -0.471773162921, + -0.104084440522, -0.0723646747909, -0.0992828975532, -0.121638215446, + -0.122619605294, -0.0317670267286, -0.0359974812869, -0.0154359225363, + -0.0375775365921, -0.00794899153653, -0.00777303219211, -0.00796085782042, + -0.0125850719397, -0.00455500206958, -0.00476436993148; + + for (int i = 0; i < 30; ++i) { + in_x(i) = in_x_array(i); + in_a(i) = in_a_array(i); + expected_out(i) = expected_out_array(i); + } + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_a; + Scalar* d_x; + Scalar* d_out; + cudaMalloc((void**)(&d_a), bytes); + cudaMalloc((void**)(&d_x), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_a, in_a.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_x, in_x.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_a(d_a, 30); + Eigen::TensorMap > gpu_x(d_x, 30); + Eigen::TensorMap > gpu_out(d_out, 30); + + gpu_out.device(gpu_device) = gpu_a.igamma_der_a(gpu_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 30; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + + cudaFree(d_a); + cudaFree(d_x); + cudaFree(d_out); +} + +template +void test_cuda_gamma_sample_der_alpha() +{ + Tensor in_alpha(30); + Tensor in_sample(30); + Tensor out(30); + Tensor expected_out(30); + out.setZero(); + + Array in_alpha_array(30); + Array in_sample_array(30); + Array expected_out_array(30); + + // See special_functions.cpp for the Python code that generates the test data. + + in_alpha_array << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, + 1.0, 1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, + 100.0, 100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0; + + in_sample_array << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05, + 1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16, 0.0132865061065, + 0.0200034203853, 6.29263709118e-17, 1.37160367764e-06, 0.333412038288, + 1.18135687766, 0.580629033777, 0.170631439426, 0.786686768458, + 7.63873279537, 13.1944344379, 11.896042354, 10.5830172417, 10.5020942233, + 92.8918587747, 95.003720371, 86.3715926467, 96.0330217672, 82.6389930677, + 968.702906754, 969.463546828, 1001.79726022, 955.047416547, 1044.27458568; + + expected_out_array << 7.42424742367e-23, 1.02004297287e-34, 0.0130155240738, + 1.02004297287e-34, 1.02004297287e-34, 1.96505168277e-13, 0.525575786243, + 0.713903991771, 2.32077561808e-14, 0.000179348049886, 0.635500453302, + 1.27561284917, 0.878125852156, 0.41565819538, 1.03606488534, + 0.885964824887, 1.16424049334, 1.10764479598, 1.04590810812, + 1.04193666963, 0.965193152414, 0.976217589464, 0.93008035061, + 0.98153216096, 0.909196397698, 0.98434963993, 0.984738050206, + 1.00106492525, 0.97734200649, 1.02198794179; + + for (int i = 0; i < 30; ++i) { + in_alpha(i) = in_alpha_array(i); + in_sample(i) = in_sample_array(i); + expected_out(i) = expected_out_array(i); + } + + std::size_t bytes = in_alpha.size() * sizeof(Scalar); + + Scalar* d_alpha; + Scalar* d_sample; + Scalar* d_out; + cudaMalloc((void**)(&d_alpha), bytes); + cudaMalloc((void**)(&d_sample), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_alpha, in_alpha.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_sample, in_sample.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_alpha(d_alpha, 30); + Eigen::TensorMap > gpu_sample(d_sample, 30); + Eigen::TensorMap > gpu_out(d_out, 30); + + gpu_out.device(gpu_device) = gpu_alpha.gamma_sample_der_alpha(gpu_sample); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 30; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + + cudaFree(d_alpha); + cudaFree(d_sample); + cudaFree(d_out); +} void test_cxx11_tensor_cuda() { @@ -1396,5 +1547,11 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_6(test_cuda_i1e()); CALL_SUBTEST_6(test_cuda_i1e()); + + CALL_SUBTEST_6(test_cuda_igamma_der_a()); + CALL_SUBTEST_6(test_cuda_igamma_der_a()); + + CALL_SUBTEST_6(test_cuda_gamma_sample_der_alpha()); + CALL_SUBTEST_6(test_cuda_gamma_sample_der_alpha()); #endif } diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp index 48d0db95e..29ba6203a 100644 --- a/unsupported/test/special_functions.cpp +++ b/unsupported/test/special_functions.cpp @@ -375,6 +375,98 @@ template void array_special_functions() CALL_SUBTEST(res = i1e(x); verify_component_wise(res, expected);); } + + /* Code to generate the data for the following two test cases. + N = 5 + np.random.seed(3) + + a = np.logspace(-2, 3, 6) + a = np.ravel(np.tile(np.reshape(a, [-1, 1]), [1, N])) + x = np.random.gamma(a, 1.0) + x = np.maximum(x, np.finfo(np.float32).tiny) + + def igamma(a, x): + return mpmath.gammainc(a, 0, x, regularized=True) + + def igamma_der_a(a, x): + res = mpmath.diff(lambda a_prime: igamma(a_prime, x), a) + return np.float64(res) + + def gamma_sample_der_alpha(a, x): + igamma_x = igamma(a, x) + def igammainv_of_igamma(a_prime): + return mpmath.findroot(lambda x_prime: igamma(a_prime, x_prime) - + igamma_x, x, solver='newton') + return np.float64(mpmath.diff(igammainv_of_igamma, a)) + + v_igamma_der_a = np.vectorize(igamma_der_a)(a, x) + v_gamma_sample_der_alpha = np.vectorize(gamma_sample_der_alpha)(a, x) + */ + + // Test igamma_der_a + { + ArrayType a(30); + ArrayType x(30); + ArrayType res(30); + ArrayType v(30); + + a << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 1.0, 1.0, + 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, 100.0, + 100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0; + + x << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05, + 1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16, + 0.0132865061065, 0.0200034203853, 6.29263709118e-17, 1.37160367764e-06, + 0.333412038288, 1.18135687766, 0.580629033777, 0.170631439426, + 0.786686768458, 7.63873279537, 13.1944344379, 11.896042354, + 10.5830172417, 10.5020942233, 92.8918587747, 95.003720371, + 86.3715926467, 96.0330217672, 82.6389930677, 968.702906754, + 969.463546828, 1001.79726022, 955.047416547, 1044.27458568; + + v << -32.7256441441, -36.4394150514, -9.66467612263, -36.4394150514, + -36.4394150514, -1.0891900302, -2.66351229645, -2.48666868596, + -0.929700494428, -3.56327722764, -0.455320135314, -0.391437214323, + -0.491352055991, -0.350454834292, -0.471773162921, -0.104084440522, + -0.0723646747909, -0.0992828975532, -0.121638215446, -0.122619605294, + -0.0317670267286, -0.0359974812869, -0.0154359225363, -0.0375775365921, + -0.00794899153653, -0.00777303219211, -0.00796085782042, + -0.0125850719397, -0.00455500206958, -0.00476436993148; + + CALL_SUBTEST(res = igamma_der_a(a, x); verify_component_wise(res, v);); + } + + // Test gamma_sample_der_alpha + { + ArrayType alpha(30); + ArrayType sample(30); + ArrayType res(30); + ArrayType v(30); + + alpha << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 1.0, + 1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, 100.0, + 100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0; + + sample << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05, + 1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16, + 0.0132865061065, 0.0200034203853, 6.29263709118e-17, 1.37160367764e-06, + 0.333412038288, 1.18135687766, 0.580629033777, 0.170631439426, + 0.786686768458, 7.63873279537, 13.1944344379, 11.896042354, + 10.5830172417, 10.5020942233, 92.8918587747, 95.003720371, + 86.3715926467, 96.0330217672, 82.6389930677, 968.702906754, + 969.463546828, 1001.79726022, 955.047416547, 1044.27458568; + + v << 7.42424742367e-23, 1.02004297287e-34, 0.0130155240738, + 1.02004297287e-34, 1.02004297287e-34, 1.96505168277e-13, 0.525575786243, + 0.713903991771, 2.32077561808e-14, 0.000179348049886, 0.635500453302, + 1.27561284917, 0.878125852156, 0.41565819538, 1.03606488534, + 0.885964824887, 1.16424049334, 1.10764479598, 1.04590810812, + 1.04193666963, 0.965193152414, 0.976217589464, 0.93008035061, + 0.98153216096, 0.909196397698, 0.98434963993, 0.984738050206, + 1.00106492525, 0.97734200649, 1.02198794179; + + CALL_SUBTEST(res = gamma_sample_der_alpha(alpha, sample); + verify_component_wise(res, v);); + } #endif } -- cgit v1.2.3 From b3fd93207bea08fc459be46f84fd6bbe18b19523 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 7 Jun 2018 14:43:02 +0200 Subject: Fix typos found using codespell --- Eigen/src/Core/Solve.h | 2 +- Eigen/src/Core/products/Parallelizer.h | 2 +- Eigen/src/IterativeLinearSolvers/SolveWithGuess.h | 2 +- Eigen/src/SparseCholesky/SimplicialCholesky_impl.h | 2 +- test/indexed_view.cpp | 2 +- unsupported/Eigen/CXX11/src/Tensor/README.md | 2 +- unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorContractionBlocking.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h | 4 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 12 ++++++------ unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h | 4 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h | 4 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorRef.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h | 8 ++++---- unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h | 4 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h | 2 +- .../CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h | 2 +- unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h | 8 ++++---- unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h | 2 +- unsupported/Eigen/CXX11/src/util/EmulateArray.h | 2 +- unsupported/Eigen/NonLinearOptimization | 4 ++-- unsupported/Eigen/OpenGLSupport | 2 +- unsupported/Eigen/src/BVH/KdBVH.h | 2 +- .../Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h | 2 +- unsupported/Eigen/src/EulerAngles/EulerSystem.h | 2 +- .../Eigen/src/IterativeSolvers/ConstrainedConjGrad.h | 2 +- unsupported/Eigen/src/IterativeSolvers/DGMRES.h | 4 ++-- unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h | 2 +- .../Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h | 4 ++-- .../Eigen/src/MatrixFunctions/MatrixExponential.h | 2 +- unsupported/Eigen/src/MatrixFunctions/MatrixPower.h | 2 +- unsupported/Eigen/src/NonLinearOptimization/qrsolv.h | 2 +- unsupported/Eigen/src/NonLinearOptimization/r1updt.h | 2 +- unsupported/Eigen/src/Polynomials/Companion.h | 4 ++-- unsupported/Eigen/src/Skyline/SkylineInplaceLU.h | 2 +- unsupported/Eigen/src/Skyline/SkylineMatrix.h | 16 ++++++++-------- unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h | 8 ++++---- unsupported/Eigen/src/SparseExtra/MarketIO.h | 2 +- unsupported/Eigen/src/Splines/SplineFitting.h | 2 +- unsupported/README.txt | 2 +- unsupported/bench/bench_svd.cpp | 2 +- unsupported/test/CMakeLists.txt | 2 +- unsupported/test/autodiff_scalar.cpp | 2 +- unsupported/test/cxx11_tensor_inflation_sycl.cpp | 4 ++-- unsupported/test/cxx11_tensor_of_float16_cuda.cu | 2 +- unsupported/test/cxx11_tensor_random_cuda.cu | 2 +- unsupported/test/forward_adolc.cpp | 2 +- unsupported/test/sparse_extra.cpp | 2 +- 54 files changed, 84 insertions(+), 84 deletions(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index a8daea511..2bf940a26 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -181,7 +181,7 @@ struct Assignment, interna } }; -} // end namepsace internal +} // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h index 84a1bf2bd..0aa92f8bc 100644 --- a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +++ b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h @@ -5,7 +5,7 @@ /* -NOTE: thes functions vave been adapted from the LDL library: +NOTE: these functions have been adapted from the LDL library: LDL Copyright (c) 2005 by Timothy A. Davis. All Rights Reserved. diff --git a/test/indexed_view.cpp b/test/indexed_view.cpp index 033d8833f..2d46ffd6b 100644 --- a/test/indexed_view.cpp +++ b/test/indexed_view.cpp @@ -140,7 +140,7 @@ void check_indexed_view() "500 501 502 503 504 505 506 507 508 509") ); - // takes the row numer 3, and repeat it 5 times + // take row number 3, and repeat it 5 times VERIFY( MATCH( A(seqN(3,5,0), all), "300 301 302 303 304 305 306 307 308 309\n" "300 301 302 303 304 305 306 307 308 309\n" diff --git a/unsupported/Eigen/CXX11/src/Tensor/README.md b/unsupported/Eigen/CXX11/src/Tensor/README.md index 49cc33c5d..dfd7ab7c7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/README.md +++ b/unsupported/Eigen/CXX11/src/Tensor/README.md @@ -581,7 +581,7 @@ is not initialized. Creates a tensor mapping an existing array of data. The data must not be freed until the TensorMap is discarded, and the size of the data must be large enough -to accomodate of the coefficients of the tensor. +to accommodate the coefficients of the tensor. float data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; Eigen::TensorMap> a(data, 3, 4); diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index 1940a9692..4fd96448f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -48,7 +48,7 @@ namespace Eigen { * *
*
Relation to other parts of Eigen:
- *
The midterm developement goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that + *
The midterm development goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that * taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code * by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor * class does not provide any of these features and is only available as a stand-alone class that just allows for diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index a942c98dd..d88e0df71 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -20,7 +20,7 @@ namespace Eigen { * \brief The tensor base class. * * This class is the common parent of the Tensor and TensorMap class, thus - * making it possible to use either class interchangably in expressions. + * making it possible to use either class interchangeably in expressions. */ template diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h index d34f9caee..639d99f9d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h @@ -75,7 +75,7 @@ class TensorXsmmContractionBlocking { outer_n_ = outer_n_ != 0 ? outer_n_ : n; } #else - // Defaults, possibly overriden per-platform. + // Defaults, possibly overridden per-platform. copyA_ = true; copyB_ = false; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index d30cc96ab..3c007b183 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -350,7 +350,7 @@ struct TensorEvaluator(m_queue.get_device(). template get_info()); auto s= m_queue.get_device().template get_info(); std::transform(s.begin(), s.end(), s.begin(), ::tolower); - if(m_queue.get_device().is_cpu()){ // intel doesnot allow to use max workgroup size + if(m_queue.get_device().is_cpu()){ // intel doesn't allow to use max workgroup size tileSize=std::min(static_cast(256), static_cast(tileSize)); } rng = n; @@ -303,7 +303,7 @@ m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) { template EIGEN_STRONG_INLINE void parallel_for_setup(Index dim0, Index dim1, Index &tileSize0, Index &tileSize1, Index &rng0, Index &rng1, Index &GRange0, Index &GRange1) const { Index max_workgroup_Size = static_cast(maxSyclThreadsPerBlock()); - if(m_queue.get_device().is_cpu()){ // intel doesnot allow to use max workgroup size + if(m_queue.get_device().is_cpu()){ // intel doesn't allow to use max workgroup size max_workgroup_Size=std::min(static_cast(256), static_cast(max_workgroup_Size)); } Index pow_of_2 = static_cast(std::log2(max_workgroup_Size)); @@ -331,7 +331,7 @@ m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) { template EIGEN_STRONG_INLINE void parallel_for_setup(Index dim0, Index dim1,Index dim2, Index &tileSize0, Index &tileSize1, Index &tileSize2, Index &rng0, Index &rng1, Index &rng2, Index &GRange0, Index &GRange1, Index &GRange2) const { Index max_workgroup_Size = static_cast(maxSyclThreadsPerBlock()); - if(m_queue.get_device().is_cpu()){ // intel doesnot allow to use max workgroup size + if(m_queue.get_device().is_cpu()){ // intel doesn't allow to use max workgroup size max_workgroup_Size=std::min(static_cast(256), static_cast(max_workgroup_Size)); } Index pow_of_2 = static_cast(std::log2(max_workgroup_Size)); @@ -377,7 +377,7 @@ m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) { EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; } EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const { - // OpenCL doesnot have such concept + // OpenCL doesn't have such concept return 2; } @@ -519,7 +519,7 @@ struct SyclDevice { return m_queue_stream->maxSyclThreadsPerBlock(); } EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const { - // OpenCL doesnot have such concept + // OpenCL doesn't have such concept return m_queue_stream->maxSyclThreadsPerMultiProcessor(); // return stream_->deviceProperties().maxThreadsPerMultiProcessor; } @@ -544,7 +544,7 @@ struct SyclDevice { }; // This is used as a distingushable device inside the kernel as the sycl device class is not Standard layout. // This is internal and must not be used by user. This dummy device allow us to specialise the tensor evaluator -// inside the kenrel. So we can have two types of eval for host and device. This is required for TensorArgMax operation +// inside the kernel. So we can have two types of eval for host and device. This is required for TensorArgMax operation struct SyclKernelDevice:DefaultDevice{}; } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index f81da318c..d6ab4d997 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -274,7 +274,7 @@ struct TensorEvaluator, D } } - // processs the line + // process the line if (is_power_of_two) { processDataLineCooleyTukey(line_buf, line_len, log_len); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 354bbe8d1..6c237bac3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -12,7 +12,7 @@ namespace Eigen { -// MakePointer class is used as a container of the adress space of the pointer +// MakePointer class is used as a container of the address space of the pointer // on the host and on the device. From the host side it generates the T* pointer // and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to // T* m_data on the host. It is always called on the device. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index 91d4ead28..f0f7c7826 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -272,8 +272,8 @@ struct TensorEvaluator, Device> break; default: eigen_assert(false && "unexpected padding"); - m_outputCols=0; // silence the uninitialised warnig; - m_outputRows=0; //// silence the uninitialised warnig; + m_outputCols=0; // silence the uninitialised warning; + m_outputRows=0; //// silence the uninitialised warning; } } eigen_assert(m_outputRows > 0); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index bf5c532ff..25ba2001e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -167,7 +167,7 @@ struct TensorIntDivisor { shift2 = log_div > 1 ? log_div-1 : 0; } - // Must have 0 <= numerator. On platforms that dont support the __uint128_t + // Must have 0 <= numerator. On platforms that don't support the __uint128_t // type numerator should also be less than 2^32-1. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const { eigen_assert(static_cast::type>(numerator) < NumTraits::highest()/2); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h index 94899252b..a379f5a94 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h @@ -106,7 +106,7 @@ struct FullReducer { /// if the shared memory is less than the GRange, we set shared_mem size to the TotalSize and in this case one kernel would be created for recursion to reduce all to one. if (GRange < outTileSize) outTileSize=GRange; /// creating the shared memory for calculating reduction. - /// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can + /// This one is used to collect all the reduced value of shared memory as we don't have global barrier on GPU. Once it is saved we can /// recursively apply reduction on it in order to reduce the whole. auto temp_global_buffer =cl::sycl::buffer(cl::sycl::range<1>(GRange)); typedef typename Eigen::internal::remove_all::type Dims; @@ -150,7 +150,7 @@ struct InnerReducer { // getting final out buffer at the moment the created buffer is true because there is no need for assign /// creating the shared memory for calculating reduction. - /// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can + /// This one is used to collect all the reduced value of shared memory as we don't have global barrier on GPU. Once it is saved we can /// recursively apply reduction on it in order to reduce the whole. dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange); dev.sycl_queue().submit([&](cl::sycl::handler &cgh) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h index 99245f778..b2b4fd8d3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h @@ -31,7 +31,7 @@ class TensorLazyBaseEvaluator { int refCount() const { return m_refcount; } private: - // No copy, no assigment; + // No copy, no assignment; TensorLazyBaseEvaluator(const TensorLazyBaseEvaluator& other); TensorLazyBaseEvaluator& operator = (const TensorLazyBaseEvaluator& other); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h index a7905706d..a248e303b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h @@ -117,7 +117,7 @@ SYCLEXTRFUNCTERNARY() -//TensorCustomOp must be specialised otherewise it will be captured by UnaryCategory while its action is different +//TensorCustomOp must be specialised otherwise it will be captured by UnaryCategory while its action is different //from the UnaryCategory and it is similar to the general FunctorExtractor. /// specialisation of TensorCustomOp #define SYCLEXTRFUNCCUSTOMUNARYOP(CVQual)\ diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h index e5b892f2e..a447c3f88 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h @@ -80,7 +80,7 @@ template < typename HostExpr, typename FunctorExpr, typename Tuple_of_Acc, typen typedef typename ConvertToDeviceExpression::Type DevExpr; auto device_expr = createDeviceExpression(functors, tuple_of_accessors); /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour - /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the + /// the first behaviour is when it is used as a root to launch the sub-kernel. The second one is when it is treated as a leafnode to pass the /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. const auto device_self_expr= Eigen::TensorReductionOp(device_expr.expr, dims, functor); /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is @@ -121,7 +121,7 @@ class ReductionFunctor::Type DevExpr; auto device_expr = createDeviceExpression(functors, tuple_of_accessors); /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour - /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the + /// the first behaviour is when it is used as a root to launch the sub-kernel. The second one is when it is treated as a leafnode to pass the /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. const auto device_self_expr= Eigen::TensorReductionOp(device_expr.expr, dims, functor); /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is @@ -168,7 +168,7 @@ public: typedef typename TensorSycl::internal::ConvertToDeviceExpression::Type DevExpr; auto device_expr = TensorSycl::internal::createDeviceExpression(functors, tuple_of_accessors); /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour - /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the + /// the first behaviour is when it is used as a root to launch the sub-kernel. The second one is when it is treated as a leafnode to pass the /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. const auto device_self_expr= Eigen::TensorReductionOp(device_expr.expr, dims, op); /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is @@ -215,7 +215,7 @@ public: typedef typename TensorSycl::internal::ConvertToDeviceExpression::Type DevExpr; auto device_expr = TensorSycl::internal::createDeviceExpression(functors, tuple_of_accessors); /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour - /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the + /// the first behaviour is when it is used as a root to launch the sub-kernel. The second one is when it is treated as a leafnode to pass the /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. const auto device_self_expr= Eigen::TensorReductionOp(device_expr.expr, dims, op); /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h index 58ab0f0d5..9e6c3e4fa 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h @@ -143,7 +143,7 @@ struct IndexList {}; /// \brief Collects internal details for generating index ranges [MIN, MAX) /// Declare primary template for index range builder /// \tparam MIN is the starting index in the tuple -/// \tparam N represents sizeof..(elemens)- sizeof...(Is) +/// \tparam N represents sizeof..(elements)- sizeof...(Is) /// \tparam Is... are the list of generated index so far template struct RangeBuilder; @@ -161,7 +161,7 @@ struct RangeBuilder { /// in this case we are recursively subtracting N by one and adding one /// index to Is... list until MIN==N /// \tparam MIN is the starting index in the tuple -/// \tparam N represents sizeof..(elemens)- sizeof...(Is) +/// \tparam N represents sizeof..(elements)- sizeof...(Is) /// \tparam Is... are the list of generated index so far template struct RangeBuilder : public RangeBuilder {}; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h index 51c099591..ef199bfb6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h @@ -568,7 +568,7 @@ struct TensorEvaluator, D Dimensions m_dimensions; - // Parameters passed to the costructor. + // Parameters passed to the constructor. Index m_plane_strides; Index m_row_strides; Index m_col_strides; diff --git a/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h b/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h index 0fe0b7c46..04d6d6b23 100644 --- a/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +++ b/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h @@ -241,7 +241,7 @@ struct dimino_first_step_elements * multiplying all elements in the given subgroup with the new * coset representative. Note that the first element of the * subgroup is always the identity element, so the first element of - * ther result of this template is going to be the coset + * the result of this template is going to be the coset * representative itself. * * Note that this template accepts an additional boolean parameter diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h b/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h index 71d55552d..0a7181102 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h @@ -33,10 +33,10 @@ namespace Eigen { // ec.Notify(true); // // Notify is cheap if there are no waiting threads. Prewait/CommitWait are not -// cheap, but they are executed only if the preceeding predicate check has +// cheap, but they are executed only if the preceding predicate check has // failed. // -// Algorihtm outline: +// Algorithm outline: // There are two main variables: predicate (managed by user) and state_. // Operation closely resembles Dekker mutual algorithm: // https://en.wikipedia.org/wiki/Dekker%27s_algorithm @@ -79,7 +79,7 @@ class EventCount { uint64_t state = state_.load(std::memory_order_seq_cst); for (;;) { if (int64_t((state & kEpochMask) - epoch) < 0) { - // The preceeding waiter has not decided on its fate. Wait until it + // The preceding waiter has not decided on its fate. Wait until it // calls either CancelWait or CommitWait, or is notified. EIGEN_THREAD_YIELD(); state = state_.load(std::memory_order_seq_cst); @@ -110,7 +110,7 @@ class EventCount { uint64_t state = state_.load(std::memory_order_relaxed); for (;;) { if (int64_t((state & kEpochMask) - epoch) < 0) { - // The preceeding waiter has not decided on its fate. Wait until it + // The preceding waiter has not decided on its fate. Wait until it // calls either CancelWait or CommitWait, or is notified. EIGEN_THREAD_YIELD(); state = state_.load(std::memory_order_relaxed); diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h index 49d0cdc36..cb3690a2e 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h @@ -198,7 +198,7 @@ class RunQueue { }; std::mutex mutex_; // Low log(kSize) + 1 bits in front_ and back_ contain rolling index of - // front/back, repsectively. The remaining bits contain modification counters + // front/back, respectively. The remaining bits contain modification counters // that are incremented on Push operations. This allows us to (1) distinguish // between empty and full conditions (if we would use log(kSize) bits for // position, these conditions would be indistinguishable); (2) obtain diff --git a/unsupported/Eigen/CXX11/src/util/EmulateArray.h b/unsupported/Eigen/CXX11/src/util/EmulateArray.h index 96b3a8261..ddd54f4b3 100644 --- a/unsupported/Eigen/CXX11/src/util/EmulateArray.h +++ b/unsupported/Eigen/CXX11/src/util/EmulateArray.h @@ -219,7 +219,7 @@ template struct array_size& > { #else -// The compiler supports c++11, and we're not targetting cuda: use std::array as Eigen::array +// The compiler supports c++11, and we're not targeting cuda: use std::array as Eigen::array #include namespace Eigen { diff --git a/unsupported/Eigen/NonLinearOptimization b/unsupported/Eigen/NonLinearOptimization index 600ab4c12..c22b89054 100644 --- a/unsupported/Eigen/NonLinearOptimization +++ b/unsupported/Eigen/NonLinearOptimization @@ -35,7 +35,7 @@ * a zero for the system (Powell hybrid "dogleg" method). * * This code is a port of minpack (http://en.wikipedia.org/wiki/MINPACK). - * Minpack is a very famous, old, robust and well-reknown package, written in + * Minpack is a very famous, old, robust and well renowned package, written in * fortran. Those implementations have been carefully tuned, tested, and used * for several decades. * @@ -63,7 +63,7 @@ * Other tests were added by myself at the very beginning of the * process and check the results for levenberg-marquardt using the reference data * on http://www.itl.nist.gov/div898/strd/nls/nls_main.shtml. Since then i've - * carefully checked that the same results were obtained when modifiying the + * carefully checked that the same results were obtained when modifying the * code. Please note that we do not always get the exact same decimals as they do, * but this is ok : they use 128bits float, and we do the tests using the C type 'double', * which is 64 bits on most platforms (x86 and amd64, at least). diff --git a/unsupported/Eigen/OpenGLSupport b/unsupported/Eigen/OpenGLSupport index 87f50947d..11d99567e 100644 --- a/unsupported/Eigen/OpenGLSupport +++ b/unsupported/Eigen/OpenGLSupport @@ -25,7 +25,7 @@ namespace Eigen { * * This module provides wrapper functions for a couple of OpenGL functions * which simplify the way to pass Eigen's object to openGL. - * Here is an exmaple: + * Here is an example: * * \code * // You need to add path_to_eigen/unsupported to your include path. diff --git a/unsupported/Eigen/src/BVH/KdBVH.h b/unsupported/Eigen/src/BVH/KdBVH.h index 1b8d75865..13f792cd0 100644 --- a/unsupported/Eigen/src/BVH/KdBVH.h +++ b/unsupported/Eigen/src/BVH/KdBVH.h @@ -170,7 +170,7 @@ private: typedef internal::vector_int_pair VIPair; typedef std::vector > VIPairList; typedef Matrix VectorType; - struct VectorComparator //compares vectors, or, more specificall, VIPairs along a particular dimension + struct VectorComparator //compares vectors, or more specifically, VIPairs along a particular dimension { VectorComparator(int inDim) : dim(inDim) {} inline bool operator()(const VIPair &v1, const VIPair &v2) const { return v1.first[dim] < v2.first[dim]; } diff --git a/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h b/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h index 866a8a460..9f7bff764 100644 --- a/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +++ b/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h @@ -300,7 +300,7 @@ public: /** \brief Reports whether previous computation was successful. * - * \returns \c Success if computation was succesful, \c NoConvergence otherwise. + * \returns \c Success if computation was successful, \c NoConvergence otherwise. */ ComputationInfo info() const { diff --git a/unsupported/Eigen/src/EulerAngles/EulerSystem.h b/unsupported/Eigen/src/EulerAngles/EulerSystem.h index 28f52da61..65c2e94c7 100644 --- a/unsupported/Eigen/src/EulerAngles/EulerSystem.h +++ b/unsupported/Eigen/src/EulerAngles/EulerSystem.h @@ -12,7 +12,7 @@ namespace Eigen { - // Forward declerations + // Forward declarations template class EulerAngles; diff --git a/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h b/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h index dc0093eb9..37d5b4c6c 100644 --- a/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +++ b/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h @@ -99,7 +99,7 @@ void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV) /** \ingroup IterativeSolvers_Module * Constrained conjugate gradient * - * Computes the minimum of \f$ 1/2((Ax).x) - bx \f$ under the contraint \f$ Cx \le f \f$ + * Computes the minimum of \f$ 1/2((Ax).x) - bx \f$ under the constraint \f$ Cx \le f \f$ */ template diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h index d603ba336..5cfe49302 100644 --- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h @@ -214,7 +214,7 @@ class DGMRES : public IterativeSolverBase > void dgmresInitDeflation(Index& rows) const; mutable DenseMatrix m_V; // Krylov basis vectors mutable DenseMatrix m_H; // Hessenberg matrix - mutable DenseMatrix m_Hes; // Initial hessenberg matrix wihout Givens rotations applied + mutable DenseMatrix m_Hes; // Initial hessenberg matrix without Givens rotations applied mutable Index m_restart; // Maximum size of the Krylov subspace mutable DenseMatrix m_U; // Vectors that form the basis of the invariant subspace mutable DenseMatrix m_MU; // matrix operator applied to m_U (for next cycles) @@ -250,7 +250,7 @@ void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rh m_H.resize(m_restart+1, m_restart); m_Hes.resize(m_restart, m_restart); m_V.resize(n,m_restart+1); - //Initial residual vector and intial norm + //Initial residual vector and initial norm x = precond.solve(x); r0 = rhs - mat * x; RealScalar beta = r0.norm(); diff --git a/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h b/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h index ae9d793b1..123485817 100644 --- a/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +++ b/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h @@ -73,7 +73,7 @@ void lmqrsolv( qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj; wa[k] = temp; - /* accumulate the tranformation in the row of s. */ + /* accumulate the transformation in the row of s. */ for (i = k+1; i matrix_exp_pade17(A, U, V); } -#elif LDBL_MANT_DIG <= 112 // quadruple precison +#elif LDBL_MANT_DIG <= 112 // quadruple precision if (l1norm < 1.639394610288918690547467954466970e-005L) { matrix_exp_pade3(arg, U, V); diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h index ebc433d89..33609aea9 100644 --- a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +++ b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h @@ -81,7 +81,7 @@ class MatrixPowerParenthesesReturnValue : public ReturnByValue< MatrixPowerParen * * \note Currently this class is only used by MatrixPower. One may * insist that this be nested into MatrixPower. This class is here to - * faciliate future development of triangular matrix functions. + * facilitate future development of triangular matrix functions. */ template class MatrixPowerAtomic : internal::noncopyable diff --git a/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h b/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h index feafd62a8..4f2f560b3 100644 --- a/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +++ b/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h @@ -61,7 +61,7 @@ void qrsolv( qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj; wa[k] = temp; - /* accumulate the tranformation in the row of s. */ + /* accumulate the transformation in the row of s. */ for (i = k+1; i givens; - // r1updt had a broader usecase, but we dont use it here. And, more + // r1updt had a broader usecase, but we don't use it here. And, more // importantly, we can not test it. eigen_assert(m==n); eigen_assert(u.size()==m); diff --git a/unsupported/Eigen/src/Polynomials/Companion.h b/unsupported/Eigen/src/Polynomials/Companion.h index e0af6ebe4..41a4efc2f 100644 --- a/unsupported/Eigen/src/Polynomials/Companion.h +++ b/unsupported/Eigen/src/Polynomials/Companion.h @@ -104,7 +104,7 @@ class companion /** Helper function for the balancing algorithm. * \returns true if the row and the column, having colNorm and rowNorm * as norms, are balanced, false otherwise. - * colB and rowB are repectively the multipliers for + * colB and rowB are respectively the multipliers for * the column and the row in order to balance them. * */ bool balanced( RealScalar colNorm, RealScalar rowNorm, @@ -113,7 +113,7 @@ class companion /** Helper function for the balancing algorithm. * \returns true if the row and the column, having colNorm and rowNorm * as norms, are balanced, false otherwise. - * colB and rowB are repectively the multipliers for + * colB and rowB are respectively the multipliers for * the column and the row in order to balance them. * */ bool balancedR( RealScalar colNorm, RealScalar rowNorm, diff --git a/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h b/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h index a1f54ed35..bda057a85 100644 --- a/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +++ b/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h @@ -41,7 +41,7 @@ public: /** Sets the relative threshold value used to prune zero coefficients during the decomposition. * - * Setting a value greater than zero speeds up computation, and yields to an imcomplete + * Setting a value greater than zero speeds up computation, and yields to an incomplete * factorization with fewer non zero coefficients. Such approximate factors are especially * useful to initialize an iterative solver. * diff --git a/unsupported/Eigen/src/Skyline/SkylineMatrix.h b/unsupported/Eigen/src/Skyline/SkylineMatrix.h index a2a8933ca..f77d79a04 100644 --- a/unsupported/Eigen/src/Skyline/SkylineMatrix.h +++ b/unsupported/Eigen/src/Skyline/SkylineMatrix.h @@ -206,26 +206,26 @@ public: if (col > row) //upper matrix { const Index minOuterIndex = inner - m_data.upperProfile(inner); - eigen_assert(outer >= minOuterIndex && "you try to acces a coeff that do not exist in the storage"); + eigen_assert(outer >= minOuterIndex && "You tried to access a coeff that does not exist in the storage"); return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); } if (col < row) //lower matrix { const Index minInnerIndex = outer - m_data.lowerProfile(outer); - eigen_assert(inner >= minInnerIndex && "you try to acces a coeff that do not exist in the storage"); + eigen_assert(inner >= minInnerIndex && "You tried to access a coeff that does not exist in the storage"); return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); } } else { if (outer > inner) //upper matrix { const Index maxOuterIndex = inner + m_data.upperProfile(inner); - eigen_assert(outer <= maxOuterIndex && "you try to acces a coeff that do not exist in the storage"); + eigen_assert(outer <= maxOuterIndex && "You tried to access a coeff that does not exist in the storage"); return this->m_data.upper(m_colStartIndex[inner] + (outer - inner)); } if (outer < inner) //lower matrix { const Index maxInnerIndex = outer + m_data.lowerProfile(outer); - eigen_assert(inner <= maxInnerIndex && "you try to acces a coeff that do not exist in the storage"); + eigen_assert(inner <= maxInnerIndex && "You tried to access a coeff that does not exist in the storage"); return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer)); } } @@ -300,11 +300,11 @@ public: if (IsRowMajor) { const Index minInnerIndex = outer - m_data.lowerProfile(outer); - eigen_assert(inner >= minInnerIndex && "you try to acces a coeff that do not exist in the storage"); + eigen_assert(inner >= minInnerIndex && "You tried to access a coeff that does not exist in the storage"); return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); } else { const Index maxInnerIndex = outer + m_data.lowerProfile(outer); - eigen_assert(inner <= maxInnerIndex && "you try to acces a coeff that do not exist in the storage"); + eigen_assert(inner <= maxInnerIndex && "You tried to access a coeff that does not exist in the storage"); return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer)); } } @@ -336,11 +336,11 @@ public: if (IsRowMajor) { const Index minOuterIndex = inner - m_data.upperProfile(inner); - eigen_assert(outer >= minOuterIndex && "you try to acces a coeff that do not exist in the storage"); + eigen_assert(outer >= minOuterIndex && "You tried to access a coeff that does not exist in the storage"); return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); } else { const Index maxOuterIndex = inner + m_data.upperProfile(inner); - eigen_assert(outer <= maxOuterIndex && "you try to acces a coeff that do not exist in the storage"); + eigen_assert(outer <= maxOuterIndex && "You tried to access a coeff that does not exist in the storage"); return this->m_data.upper(m_colStartIndex[inner] + (outer - inner)); } } diff --git a/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h b/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h index 037a13f86..3f1ff14ad 100644 --- a/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +++ b/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h @@ -187,7 +187,7 @@ template /** Does nothing: provided for compatibility with SparseMatrix */ inline void finalize() {} - /** Suppress all nonzeros which are smaller than \a reference under the tolerence \a epsilon */ + /** Suppress all nonzeros which are smaller than \a reference under the tolerance \a epsilon */ void prune(Scalar reference, RealScalar epsilon = NumTraits::dummy_precision()) { for (Index j=0; j } } - /** The class DynamicSparseMatrix is deprectaed */ + /** The class DynamicSparseMatrix is deprecated */ EIGEN_DEPRECATED inline DynamicSparseMatrix() : m_innerSize(0), m_data(0) { eigen_assert(innerSize()==0 && outerSize()==0); } - /** The class DynamicSparseMatrix is deprectaed */ + /** The class DynamicSparseMatrix is deprecated */ EIGEN_DEPRECATED inline DynamicSparseMatrix(Index rows, Index cols) : m_innerSize(0) { resize(rows, cols); } - /** The class DynamicSparseMatrix is deprectaed */ + /** The class DynamicSparseMatrix is deprecated */ template EIGEN_DEPRECATED explicit inline DynamicSparseMatrix(const SparseMatrixBase& other) : m_innerSize(0) diff --git a/unsupported/Eigen/src/SparseExtra/MarketIO.h b/unsupported/Eigen/src/SparseExtra/MarketIO.h index 6d57ab2e9..1618b09a8 100644 --- a/unsupported/Eigen/src/SparseExtra/MarketIO.h +++ b/unsupported/Eigen/src/SparseExtra/MarketIO.h @@ -104,7 +104,7 @@ namespace internal out << value.real << " " << value.imag()<< "\n"; } -} // end namepsace internal +} // end namespace internal inline bool getMarketHeader(const std::string& filename, int& sym, bool& iscomplex, bool& isvector) { diff --git a/unsupported/Eigen/src/Splines/SplineFitting.h b/unsupported/Eigen/src/Splines/SplineFitting.h index c761a9b3d..1a4b80a2f 100644 --- a/unsupported/Eigen/src/Splines/SplineFitting.h +++ b/unsupported/Eigen/src/Splines/SplineFitting.h @@ -181,7 +181,7 @@ namespace Eigen * \ingroup Splines_Module * * \param[in] pts The data points to which a spline should be fit. - * \param[out] chord_lengths The resulting chord lenggth vector. + * \param[out] chord_lengths The resulting chord length vector. * * \sa Les Piegl and Wayne Tiller, The NURBS book (2nd ed.), 1997, 9.2.1 Global Curve Interpolation to Point Data **/ diff --git a/unsupported/README.txt b/unsupported/README.txt index 83479ff0b..70793bf13 100644 --- a/unsupported/README.txt +++ b/unsupported/README.txt @@ -20,7 +20,7 @@ However, it: - must rely on Eigen, - must be highly related to math, - should have some general purpose in the sense that it could - potentially become an offical Eigen module (or be merged into another one). + potentially become an official Eigen module (or be merged into another one). In doubt feel free to contact us. For instance, if your addons is very too specific but it shows an interesting way of using Eigen, then it could be a nice demo. diff --git a/unsupported/bench/bench_svd.cpp b/unsupported/bench/bench_svd.cpp index 01d8231ae..e7028a2b9 100644 --- a/unsupported/bench/bench_svd.cpp +++ b/unsupported/bench/bench_svd.cpp @@ -70,7 +70,7 @@ void bench_svd(const MatrixType& a = MatrixType()) std::cout<< std::endl; timerJacobi.reset(); timerBDC.reset(); - cout << " Computes rotaion matrix" < A; typedef std::numeric_limits B; - // workaround "unsed typedef" warning: + // workaround "unused typedef" warning: VERIFY(!bool(internal::is_same::value)); #if EIGEN_HAS_CXX11 diff --git a/unsupported/test/cxx11_tensor_inflation_sycl.cpp b/unsupported/test/cxx11_tensor_inflation_sycl.cpp index f2f87f7ed..cf3e29f4c 100644 --- a/unsupported/test/cxx11_tensor_inflation_sycl.cpp +++ b/unsupported/test/cxx11_tensor_inflation_sycl.cpp @@ -22,10 +22,10 @@ using Eigen::Tensor; -// Inflation Defenition for each dimention the inflated val would be +// Inflation Definition for each dimension the inflated val would be //((dim-1)*strid[dim] +1) -// for 1 dimnention vector of size 3 with value (4,4,4) with the inflated stride value of 3 would be changed to +// for 1 dimension vector of size 3 with value (4,4,4) with the inflated stride value of 3 would be changed to // tensor of size (2*3) +1 = 7 with the value of // (4, 0, 0, 4, 0, 0, 4). diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 167b75d25..7a751ff02 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -247,7 +247,7 @@ void test_cuda_trancendental() { } for (int i = 0; i < num_elem; ++i) { std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl; - if(std::abs(input2(i)-1.f)<0.05f) // log lacks accurary nearby 1 + if(std::abs(input2(i)-1.f)<0.05f) // log lacks accuracy nearby 1 VERIFY_IS_APPROX(full_prec2(i)+Eigen::half(0.1f), half_prec2(i)+Eigen::half(0.1f)); else VERIFY_IS_APPROX(full_prec2(i), half_prec2(i)); diff --git a/unsupported/test/cxx11_tensor_random_cuda.cu b/unsupported/test/cxx11_tensor_random_cuda.cu index fa1a46732..389c0a8c2 100644 --- a/unsupported/test/cxx11_tensor_random_cuda.cu +++ b/unsupported/test/cxx11_tensor_random_cuda.cu @@ -37,7 +37,7 @@ void test_cuda_random_uniform() assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); - // For now we just check thes code doesn't crash. + // For now we just check this code doesn't crash. // TODO: come up with a valid test of randomness } diff --git a/unsupported/test/forward_adolc.cpp b/unsupported/test/forward_adolc.cpp index 866db8e86..6d0ae738d 100644 --- a/unsupported/test/forward_adolc.cpp +++ b/unsupported/test/forward_adolc.cpp @@ -132,7 +132,7 @@ void test_forward_adolc() } { - // simple instanciation tests + // simple instantiation tests Matrix x; foo(x); Matrix A(4,4);; diff --git a/unsupported/test/sparse_extra.cpp b/unsupported/test/sparse_extra.cpp index 4f6723d6d..7cf4a77c3 100644 --- a/unsupported/test/sparse_extra.cpp +++ b/unsupported/test/sparse_extra.cpp @@ -8,7 +8,7 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -// import basic and product tests for deprectaed DynamicSparseMatrix +// import basic and product tests for deprecated DynamicSparseMatrix #define EIGEN_NO_DEPRECATED_WARNING #include "sparse_basic.cpp" #include "sparse_product.cpp" -- cgit v1.2.3 From e5f9f4768f9118dcafa5d254cfce7696e36f3a16 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Thu, 7 Jun 2018 15:03:50 +0200 Subject: Avoid unnecessary C++11 dependency --- Eigen/src/Core/MathFunctions.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 05462c5e1..954863c39 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -1282,17 +1282,17 @@ double exp(const double &x) { return ::exp(x); } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp(const std::complex& x) { - auto com = ::expf(x.real()); - auto res_real = com * ::cosf(x.imag()); - auto res_imag = com * ::sinf(x.imag()); + float com = ::expf(x.real()); + float res_real = com * ::cosf(x.imag()); + float res_imag = com * ::sinf(x.imag()); return std::complex(res_real, res_imag); } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp(const std::complex& x) { - auto com = ::exp(x.real()); - auto res_real = com * ::cos(x.imag()); - auto res_imag = com * ::sin(x.imag()); + double com = ::exp(x.real()); + double res_real = com * ::cos(x.imag()); + double res_imag = com * ::sin(x.imag()); return std::complex(res_real, res_imag); } #endif -- cgit v1.2.3 From 7fe29aceeb9f7a7ec8bd6f9fa81b88d50e4819b6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 7 Jun 2018 15:36:20 +0200 Subject: Fix MSVC warning C4290: C++ exception specification ignored except to indicate a function is not __declspec(nothrow) --- Eigen/src/Core/util/Macros.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 729fb3b1d..8927bd404 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -1030,7 +1030,13 @@ namespace Eigen { # define EIGEN_NOEXCEPT # define EIGEN_NOEXCEPT_IF(x) # define EIGEN_NO_THROW throw() -# define EIGEN_EXCEPTION_SPEC(X) throw(X) +# if EIGEN_COMP_MSVC + // MSVC does not support exception specifications (warning C4290), + // and they are deprecated in c++11 anyway. +# define EIGEN_EXCEPTION_SPEC(X) throw() +# else +# define EIGEN_EXCEPTION_SPEC(X) throw(X) +# endif #endif #endif // EIGEN_MACROS_H -- cgit v1.2.3 From af7c83b9a20b1903ae5524c5f1be9d4093026d86 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 7 Jun 2018 15:45:24 +0200 Subject: Fix warning --- Eigen/src/Core/SelfCwiseBinaryOp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h index 50099df82..7c89c2e23 100644 --- a/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -17,7 +17,6 @@ namespace Eigen { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); return derived(); } @@ -25,7 +24,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(co template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); return derived(); } @@ -33,7 +31,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(co template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); return derived(); } @@ -41,7 +38,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(co template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); return derived(); } -- cgit v1.2.3 From 7d7bb91537e679c8246107936b5fd376bba1f5b0 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Thu, 7 Jun 2018 20:30:09 +0200 Subject: Missing line during manual rebase of PR-374 --- Eigen/src/Core/ProductEvaluators.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 5d2737d01..0d5aec570 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -357,7 +357,7 @@ struct generic_product_impl_base { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } template - static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } }; -- cgit v1.2.3 From 522d3ca54dcb0d79b2c69acf0e1624746d3f41ec Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 7 Jun 2018 13:02:07 -0700 Subject: Don't use std::equal_to inside cuda kernels since it's not supported. --- Eigen/src/Core/util/Meta.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 6e5af35c0..0d0b8c43a 100755 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -551,23 +551,27 @@ T div_ceil(const T &a, const T &b) // The aim of the following functions is to bypass -Wfloat-equal warnings // when we really want a strict equality comparison on floating points. -template EIGEN_STRONG_INLINE +template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool equal_strict(const X& x,const Y& y) { return x == y; } -template<> EIGEN_STRONG_INLINE +#if !defined(EIGEN_CUDA_ARCH) +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool equal_strict(const float& x,const float& y) { return std::equal_to()(x,y); } -template<> EIGEN_STRONG_INLINE +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool equal_strict(const double& x,const double& y) { return std::equal_to()(x,y); } +#endif -template EIGEN_STRONG_INLINE +template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool not_equal_strict(const X& x,const Y& y) { return x != y; } -template<> EIGEN_STRONG_INLINE +#if !defined(EIGEN_CUDA_ARCH) +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to()(x,y); } -template<> EIGEN_STRONG_INLINE +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to()(x,y); } +#endif } // end namespace numext -- cgit v1.2.3 From f05dea6b2326836e5e0243fbaffbece84b833d64 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 8 Jun 2018 10:14:57 +0200 Subject: bug #1550: prevent avoidable memory allocation in RealSchur --- Eigen/src/Eigenvalues/RealSchur.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h index b72799e5b..4c53344bb 100644 --- a/Eigen/src/Eigenvalues/RealSchur.h +++ b/Eigen/src/Eigenvalues/RealSchur.h @@ -284,13 +284,13 @@ RealSchur& RealSchur::computeFromHessenberg(const HessMa using std::abs; m_matT = matrixH; + m_workspaceVector.resize(m_matT.cols()); if(computeU) - m_matU = matrixQ; + matrixQ.evalTo(m_matU, m_workspaceVector); Index maxIters = m_maxIters; if (maxIters == -1) maxIters = m_maxIterationsPerRow * matrixH.rows(); - m_workspaceVector.resize(m_matT.cols()); Scalar* workspace = &m_workspaceVector.coeffRef(0); // The matrix m_matT is divided in three parts. -- cgit v1.2.3 From 89d65bb9d6ed8ebc9ce297a3fad90d78ba6cbbbb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 8 Jun 2018 16:50:17 +0200 Subject: bug #1531: expose NumDimensions for compatibility with Tensor --- Eigen/src/Core/DenseBase.h | 5 +++++ test/indexed_view.cpp | 4 ---- test/main.h | 2 ++ test/mapped_matrix.cpp | 49 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 55 insertions(+), 5 deletions(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 53b427b17..8b994f35c 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -157,6 +157,11 @@ template class DenseBase * we are dealing with a column-vector (if there is only one column) or with * a row-vector (if there is only one row). */ + NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 : bool(IsVectorAtCompileTime) ? 1 : 2, + /**< This value is equal to Tensor::NumDimensions, i.e. 0 for scalars, 1 for vectors, + * and 2 for matrices. + */ + Flags = internal::traits::Flags, /**< This stores expression \ref flags flags which may or may not be inherited by new expressions * constructed from this one. See the \ref flags "list of flags". diff --git a/test/indexed_view.cpp b/test/indexed_view.cpp index 2d46ffd6b..551dc55b0 100644 --- a/test/indexed_view.cpp +++ b/test/indexed_view.cpp @@ -397,10 +397,6 @@ void test_indexed_view() // } // static checks of some internals: - - #define STATIC_CHECK( COND ) \ - EIGEN_STATIC_ASSERT( (COND) , EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT ) - STATIC_CHECK(( internal::is_valid_index_type::value )); STATIC_CHECK(( internal::is_valid_index_type::value )); STATIC_CHECK(( internal::is_valid_index_type::value )); diff --git a/test/main.h b/test/main.h index a94e1ab41..9c8148de2 100644 --- a/test/main.h +++ b/test/main.h @@ -339,6 +339,8 @@ inline void verify_impl(bool condition, const char *testname, const char *file, #define VERIFY_IS_UNITARY(a) VERIFY(test_isUnitary(a)) +#define STATIC_CHECK(COND) EIGEN_STATIC_ASSERT( (COND) , EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT ) + #define CALL_SUBTEST(FUNC) do { \ g_test_stack.push_back(EI_PP_MAKE_STRING(FUNC)); \ FUNC; \ diff --git a/test/mapped_matrix.cpp b/test/mapped_matrix.cpp index 6a84c5897..b9f36d8d6 100644 --- a/test/mapped_matrix.cpp +++ b/test/mapped_matrix.cpp @@ -181,6 +181,49 @@ void map_not_aligned_on_scalar() internal::aligned_delete(array1, (size+1)*(size+1)+1); } +#if EIGEN_HAS_CXX11 +template