From 47fefa235f73315bc57d685a7bc9cd8d3577349f Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 3 Sep 2019 17:20:56 -0700 Subject: Allow move-only done callback in TensorAsyncDevice --- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 8 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h | 10 ++--- .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 34 +++++++++------- .../CXX11/src/Tensor/TensorForwardDeclarations.h | 4 +- unsupported/test/cxx11_tensor_executor.cpp | 19 ++++++--- unsupported/test/cxx11_tensor_thread_pool.cpp | 47 ++++++++++++---------- 6 files changed, 69 insertions(+), 53 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 095c85dc4..f2aa37256 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -1065,12 +1065,12 @@ class TensorBase : public TensorBase { #ifdef EIGEN_USE_THREADS // Select the async device on which to evaluate the expression. - template + template typename internal::enable_if< internal::is_same::value, - TensorAsyncDevice>::type - device(const DeviceType& dev, std::function done) { - return TensorAsyncDevice(dev, derived(), std::move(done)); + TensorAsyncDevice>::type + device(const DeviceType& dev, DoneCallback done) { + return TensorAsyncDevice(dev, derived(), std::move(done)); } #endif // EIGEN_USE_THREADS diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h index 5122b3623..cc9c65702 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -73,21 +73,21 @@ template class TensorDevice { * ThreadPoolDevice). * * Example: - * std::function done = []() {}; + * auto done = []() { ... expression evaluation done ... }; * C.device(EIGEN_THREAD_POOL, std::move(done)) = A + B; */ -template +template class TensorAsyncDevice { public: TensorAsyncDevice(const DeviceType& device, ExpressionType& expression, - std::function done) + DoneCallback done) : m_device(device), m_expression(expression), m_done(std::move(done)) {} template EIGEN_STRONG_INLINE TensorAsyncDevice& operator=(const OtherDerived& other) { typedef TensorAssignOp Assign; - typedef internal::TensorAsyncExecutor Executor; + typedef internal::TensorAsyncExecutor Executor; // WARNING: After assignment 'm_done' callback will be in undefined state. Assign assign(m_expression, other); @@ -99,7 +99,7 @@ class TensorAsyncDevice { protected: const DeviceType& m_device; ExpressionType& m_expression; - std::function m_done; + DoneCallback m_done; }; #endif // EIGEN_USE_THREADS diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 10339e5e7..cf07656b3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -101,8 +101,8 @@ class TensorExecutor { * Default async execution strategy is not implemented. Currently it's only * available for ThreadPoolDevice (see definition below). */ -template +template class TensorAsyncExecutor {}; /** @@ -419,15 +419,17 @@ class TensorExecutor -class TensorAsyncExecutor { +template +class TensorAsyncExecutor { public: typedef typename Expression::Index StorageIndex; typedef TensorEvaluator Evaluator; static EIGEN_STRONG_INLINE void runAsync(const Expression& expr, const ThreadPoolDevice& device, - std::function done) { + DoneCallback done) { TensorAsyncExecutorContext* const ctx = new TensorAsyncExecutorContext(expr, device, std::move(done)); @@ -455,7 +457,7 @@ class TensorAsyncExecutor struct TensorAsyncExecutorContext { TensorAsyncExecutorContext(const Expression& expr, const ThreadPoolDevice& thread_pool, - std::function done) + DoneCallback done) : evaluator(expr, thread_pool), on_done(std::move(done)) {} ~TensorAsyncExecutorContext() { @@ -466,12 +468,13 @@ class TensorAsyncExecutor Evaluator evaluator; private: - std::function on_done; + DoneCallback on_done; }; }; -template -class TensorAsyncExecutor { +template +class TensorAsyncExecutor { public: typedef typename traits::Index StorageIndex; typedef typename traits::Scalar Scalar; @@ -485,7 +488,7 @@ class TensorAsyncExecutor done) { + DoneCallback done) { TensorAsyncExecutorContext* const ctx = new TensorAsyncExecutorContext(expr, device, std::move(done)); @@ -494,9 +497,10 @@ class TensorAsyncExecutor::value) { - internal::TensorAsyncExecutor::runAsync( - expr, device, [ctx]() { delete ctx; }); + auto delete_ctx = [ctx]() { delete ctx; }; + internal::TensorAsyncExecutor< + Expression, ThreadPoolDevice, decltype(delete_ctx), Vectorizable, + /*Tileable*/ false>::runAsync(expr, device, std::move(delete_ctx)); return; } @@ -532,7 +536,7 @@ class TensorAsyncExecutor done) + DoneCallback done) : device(thread_pool), evaluator(expr, thread_pool), on_done(std::move(done)) {} @@ -548,7 +552,7 @@ class TensorAsyncExecutor on_done; + DoneCallback on_done; }; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index e823bd932..772dbbe35 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -94,7 +94,7 @@ template class MakePointer_ = MakePointer> cl template class TensorForcedEvalOp; template class TensorDevice; -template class TensorAsyncDevice; +template class TensorAsyncDevice; template struct TensorEvaluator; struct NoOpOutputKernel; @@ -168,7 +168,7 @@ template ::value> class TensorExecutor; -template ::value, bool Tileable = IsTileable::value> class TensorAsyncExecutor; diff --git a/unsupported/test/cxx11_tensor_executor.cpp b/unsupported/test/cxx11_tensor_executor.cpp index f4d0401da..aa4ab0b80 100644 --- a/unsupported/test/cxx11_tensor_executor.cpp +++ b/unsupported/test/cxx11_tensor_executor.cpp @@ -578,11 +578,15 @@ static void test_async_execute_unary_expr(Device d) src.setRandom(); const auto expr = src.square(); + Eigen::Barrier done(1); + auto on_done = [&done]() { done.Notify(); }; + using Assign = TensorAssignOp; - using Executor = internal::TensorAsyncExecutor; - Eigen::Barrier done(1); - Executor::runAsync(Assign(dst, expr), d, [&done]() { done.Notify(); }); + + Executor::runAsync(Assign(dst, expr), d, on_done); done.Wait(); for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { @@ -610,12 +614,15 @@ static void test_async_execute_binary_expr(Device d) const auto expr = lhs + rhs; + Eigen::Barrier done(1); + auto on_done = [&done]() { done.Notify(); }; + using Assign = TensorAssignOp; - using Executor = internal::TensorAsyncExecutor; - Eigen::Barrier done(1); - Executor::runAsync(Assign(dst, expr), d, [&done]() { done.Notify(); }); + Executor::runAsync(Assign(dst, expr), d, on_done); done.Wait(); for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index 62973cd08..dae7b0335 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -683,34 +683,39 @@ EIGEN_DECLARE_TEST(cxx11_tensor_thread_pool) CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread()); CALL_SUBTEST_3(test_multithread_contraction_with_output_kernel()); CALL_SUBTEST_3(test_multithread_contraction_with_output_kernel()); - CALL_SUBTEST_3(test_async_multithread_contraction_agrees_with_singlethread()); - CALL_SUBTEST_3(test_async_multithread_contraction_agrees_with_singlethread()); + + CALL_SUBTEST_4(test_async_multithread_contraction_agrees_with_singlethread()); + CALL_SUBTEST_4(test_async_multithread_contraction_agrees_with_singlethread()); // Test EvalShardedByInnerDimContext parallelization strategy. - CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction()); - CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction()); - CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction_with_output_kernel()); - CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction_with_output_kernel()); - CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction()); - CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction()); - CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction_with_output_kernel()); - CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction_with_output_kernel()); + CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction()); + CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction()); + CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction_with_output_kernel()); + CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction_with_output_kernel()); + + CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction()); + CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction()); + CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction_with_output_kernel()); + CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction_with_output_kernel()); // Exercise various cases that have been problematic in the past. - CALL_SUBTEST_5(test_contraction_corner_cases()); - CALL_SUBTEST_5(test_contraction_corner_cases()); + CALL_SUBTEST_7(test_contraction_corner_cases()); + CALL_SUBTEST_7(test_contraction_corner_cases()); - CALL_SUBTEST_6(test_full_contraction()); - CALL_SUBTEST_6(test_full_contraction()); + CALL_SUBTEST_8(test_full_contraction()); + CALL_SUBTEST_8(test_full_contraction()); - CALL_SUBTEST_7(test_multithreaded_reductions()); - CALL_SUBTEST_7(test_multithreaded_reductions()); + CALL_SUBTEST_9(test_multithreaded_reductions()); + CALL_SUBTEST_9(test_multithreaded_reductions()); - CALL_SUBTEST_7(test_memcpy()); - CALL_SUBTEST_7(test_multithread_random()); + CALL_SUBTEST_10(test_memcpy()); + CALL_SUBTEST_10(test_multithread_random()); TestAllocator test_allocator; - CALL_SUBTEST_7(test_multithread_shuffle(NULL)); - CALL_SUBTEST_7(test_multithread_shuffle(&test_allocator)); - CALL_SUBTEST_7(test_threadpool_allocate(&test_allocator)); + CALL_SUBTEST_11(test_multithread_shuffle(NULL)); + CALL_SUBTEST_11(test_multithread_shuffle(&test_allocator)); + CALL_SUBTEST_11(test_threadpool_allocate(&test_allocator)); + + // Force CMake to split this test. + // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11 } -- cgit v1.2.3 From f59bed7a133322955dac03f3654def706aff3ba6 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 3 Sep 2019 19:11:36 -0700 Subject: Change typedefs from private to protected to fix MSVC compilation --- Eigen/src/SparseCore/SparseCwiseUnaryOp.h | 2 ++ Eigen/src/SparseCore/SparseView.h | 1 + 2 files changed, 3 insertions(+) diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h index ea7973790..df6c28d2b 100644 --- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h @@ -49,6 +49,7 @@ template class unary_evaluator, IteratorBased>::InnerIterator : public unary_evaluator, IteratorBased>::EvalIterator { + protected: typedef typename XprType::Scalar Scalar; typedef typename unary_evaluator, IteratorBased>::EvalIterator Base; public: @@ -99,6 +100,7 @@ template class unary_evaluator, IteratorBased>::InnerIterator : public unary_evaluator, IteratorBased>::EvalIterator { + protected: typedef typename XprType::Scalar Scalar; typedef typename unary_evaluator, IteratorBased>::EvalIterator Base; public: diff --git a/Eigen/src/SparseCore/SparseView.h b/Eigen/src/SparseCore/SparseView.h index 7c4aea743..92b3d1f7b 100644 --- a/Eigen/src/SparseCore/SparseView.h +++ b/Eigen/src/SparseCore/SparseView.h @@ -90,6 +90,7 @@ struct unary_evaluator, IteratorBased> class InnerIterator : public EvalIterator { + protected: typedef typename XprType::Scalar Scalar; public: -- cgit v1.2.3 From e38dd48a27b59b59a924b66a9486c3c2856acdf9 Mon Sep 17 00:00:00 2001 From: Srinivas Vasudevan Date: Mon, 12 Aug 2019 19:26:29 -0400 Subject: PR 681: Add ndtri function, the inverse of the normal distribution function. --- Eigen/src/Core/GenericPacketMath.h | 21 ++ Eigen/src/Core/GlobalFunctions.h | 1 + Eigen/src/Core/arch/AVX/PacketMath.h | 1 + Eigen/src/Core/arch/AVX512/PacketMath.h | 1 + .../Core/arch/Default/GenericPacketMathFunctions.h | 55 ++++ Eigen/src/Core/arch/GPU/PacketMath.h | 2 + Eigen/src/Core/arch/SSE/PacketMath.h | 1 + Eigen/src/Core/arch/SYCL/InteropHeaders.h | 1 + Eigen/src/Core/util/ForwardDeclarations.h | 1 + Eigen/src/plugins/ArrayCwiseUnaryOps.h | 28 +- test/packetmath.cpp | 6 + unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 6 + unsupported/Eigen/SpecialFunctions | 1 + .../SpecialFunctions/SpecialFunctionsFunctors.h | 25 ++ .../src/SpecialFunctions/SpecialFunctionsHalf.h | 3 + .../src/SpecialFunctions/SpecialFunctionsImpl.h | 311 +++++++++++++++++---- .../SpecialFunctions/SpecialFunctionsPacketMath.h | 7 + .../arch/GPU/GpuSpecialFunctions.h | 13 + unsupported/test/cxx11_tensor_gpu.cu | 64 +++++ unsupported/test/special_functions.cpp | 20 ++ 20 files changed, 498 insertions(+), 70 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 6ab38994f..651e3f7b3 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -83,6 +83,7 @@ struct default_packet_traits HasPolygamma = 0, HasErf = 0, HasErfc = 0, + HasNdtri = 0, HasI0e = 0, HasI1e = 0, HasIGamma = 0, @@ -257,12 +258,32 @@ pldexp(const Packet &a, const Packet &exponent) { return std::ldexp(a,exponent); template EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) { return pxor(a,a); } +template<> EIGEN_DEVICE_FUNC inline float pzero(const float& a) { + EIGEN_UNUSED_VARIABLE(a); + return 0.; +} + +template<> EIGEN_DEVICE_FUNC inline double pzero(const double& a) { + EIGEN_UNUSED_VARIABLE(a); + return 0.; +} + /** \internal \returns bits of \a or \b according to the input bit mask \a mask */ template EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) { return por(pand(a,mask),pandnot(b,mask)); } +template<> EIGEN_DEVICE_FUNC inline float pselect( + const float& mask, const float& a, const float&b) { + return mask == 0 ? b : a; +} + +template<> EIGEN_DEVICE_FUNC inline double pselect( + const double& mask, const double& a, const double& b) { + return mask == 0 ? b : a; +} + /** \internal \returns a <= b as a bit mask */ template EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); } diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 71377cee5..7f132bdd0 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -76,6 +76,7 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ndtri,scalar_ndtri_op,inverse normal distribution function,\sa ArrayBase::ndtri) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 020f6c276..e3363d006 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -72,6 +72,7 @@ template<> struct packet_traits : default_packet_traits HasLog1p = 1, HasExpm1 = 1, HasExp = 1, + HasNdtri = 1, HasSqrt = 1, HasRsqrt = 1, HasTanh = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index e37855693..11c8dae02 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -97,6 +97,7 @@ template<> struct packet_traits : default_packet_traits HasLog = 1, HasLog1p = 1, HasExpm1 = 1, + HasNdtri = 1, #endif HasExp = 1, HasSqrt = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 0fc673e12..13351d5ec 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -512,5 +512,60 @@ Packet pcos_float(const Packet& x) return psincos_float(x); } +/* polevl (modified for Eigen) + * + * Evaluate polynomial + * + * + * + * SYNOPSIS: + * + * int N; + * Scalar x, y, coef[N+1]; + * + * y = polevl( x, coef); + * + * + * + * DESCRIPTION: + * + * Evaluates polynomial of degree N: + * + * 2 N + * y = C + C x + C x +...+ C x + * 0 1 2 N + * + * Coefficients are stored in reverse order: + * + * coef[0] = C , ..., coef[N] = C . + * N 0 + * + * The function p1evl() assumes that coef[N] = 1.0 and is + * omitted from the array. Its calling arguments are + * otherwise the same as polevl(). + * + * + * The Eigen implementation is templatized. For best speed, store + * coef as a const array (constexpr), e.g. + * + * const double coef[] = {1.0, 2.0, 3.0, ...}; + * + */ +template +struct ppolevl { + static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits::type coeff[]) { + EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + return pmadd(ppolevl::run(x, coeff), x, pset1(coeff[N])); + } +}; + +template +struct ppolevl { + static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits::type coeff[]) { + EIGEN_UNUSED_VARIABLE(x); + return pset1(coeff[0]); + } +}; + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h index d1df26e57..bdbaa5362 100644 --- a/Eigen/src/Core/arch/GPU/PacketMath.h +++ b/Eigen/src/Core/arch/GPU/PacketMath.h @@ -44,6 +44,7 @@ template<> struct packet_traits : default_packet_traits HasPolygamma = 1, HasErf = 1, HasErfc = 1, + HasNdtri = 1, HasI0e = 1, HasI1e = 1, HasIGamma = 1, @@ -78,6 +79,7 @@ template<> struct packet_traits : default_packet_traits HasPolygamma = 1, HasErf = 1, HasErfc = 1, + HasNdtri = 1, HasI0e = 1, HasI1e = 1, HasIGamma = 1, diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index b59e2c602..61500c06d 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -112,6 +112,7 @@ template<> struct packet_traits : default_packet_traits HasLog = 1, HasLog1p = 1, HasExpm1 = 1, + HasNdtri = 1, HasExp = 1, HasSqrt = 1, HasRsqrt = 1, diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h index ef66fc7de..d76030419 100644 --- a/Eigen/src/Core/arch/SYCL/InteropHeaders.h +++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h @@ -54,6 +54,7 @@ struct sycl_packet_traits : default_packet_traits { HasPolygamma = 0, HasErf = 0, HasErfc = 0, + HasNdtri = 0, HasIGamma = 0, HasIGammac = 0, HasBetaInc = 0, diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 050d15e96..18889fcf4 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -214,6 +214,7 @@ template struct scalar_lgamma_op; template struct scalar_digamma_op; template struct scalar_erf_op; template struct scalar_erfc_op; +template struct scalar_ndtri_op; template struct scalar_igamma_op; template struct scalar_igammac_op; template struct scalar_zeta_op; diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 2f99ee0b2..4aef72d92 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -536,14 +536,12 @@ typedef CwiseUnaryOp, const Derived> LgammaRe typedef CwiseUnaryOp, const Derived> DigammaReturnType; typedef CwiseUnaryOp, const Derived> ErfReturnType; typedef CwiseUnaryOp, const Derived> ErfcReturnType; +typedef CwiseUnaryOp, const Derived> NdtriReturnType; /** \cpp11 \returns an expression of the coefficient-wise ln(|gamma(*this)|). * * \specialfunctions_module * - * Example: \include Cwise_lgamma.cpp - * Output: \verbinclude Cwise_lgamma.out - * * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, * or float/double in non c++11 mode, the user has to provide implementations of lgamma(T) for any scalar * type T to be supported. @@ -579,9 +577,6 @@ digamma() const * * \specialfunctions_module * - * Example: \include Cwise_erf.cpp - * Output: \verbinclude Cwise_erf.out - * * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, * or float/double in non c++11 mode, the user has to provide implementations of erf(T) for any scalar * type T to be supported. @@ -600,9 +595,6 @@ erf() const * * \specialfunctions_module * - * Example: \include Cwise_erfc.cpp - * Output: \verbinclude Cwise_erfc.out - * * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, * or float/double in non c++11 mode, the user has to provide implementations of erfc(T) for any scalar * type T to be supported. @@ -615,3 +607,21 @@ erfc() const { return ErfcReturnType(derived()); } + +/** \cpp11 \returns an expression of the coefficient-wise Complementary error + * function of *this. + * + * \specialfunctions_module + * + * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations of ndtri(T) for any scalar + * type T to be supported. + * + * \sa Math functions, erf() + */ +EIGEN_DEVICE_FUNC +inline const NdtriReturnType +ndtri() const +{ + return NdtriReturnType(derived()); +} diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 67ff6dc5b..0e1e50e21 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -590,6 +590,12 @@ template void packetmath_real() h.store(data2, internal::perfc(h.load(data1))); VERIFY((numext::isnan)(data2[0])); } + { + for (int i=0; i(0,1); + } + CHECK_CWISE1_IF(internal::packet_traits::HasNdtri, numext::ndtri, internal::pndtri); + } #endif // EIGEN_HAS_C99_MATH for (int i=0; i return unaryExpr(internal::scalar_erfc_op()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + ndtri() const { + return unaryExpr(internal::scalar_ndtri_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> sigmoid() const { diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions index 44fd99b43..a5abd407d 100644 --- a/unsupported/Eigen/SpecialFunctions +++ b/unsupported/Eigen/SpecialFunctions @@ -33,6 +33,7 @@ namespace Eigen { * - gamma_sample_der_alpha * - igammac * - digamma + * - ndtri * - polygamma * - zeta * - betainc diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h index c6fac91bb..13a72a3ee 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h @@ -283,6 +283,31 @@ struct functor_traits > }; }; +/** \internal + * \brief Template functor to compute the Inverse of the normal distribution + * function of a scalar + * \sa class CwiseUnaryOp, Cwise::ndtri() + */ +template struct scalar_ndtri_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_ndtri_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { + using numext::ndtri; return ndtri(a); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::pndtri(a); } +}; +template +struct functor_traits > +{ + enum { + // On average, We are evaluating rational functions with degree N=9 in the + // numerator and denominator. This results in 2*N additions and 2*N + // multiplications. + Cost = 18 * NumTraits::MulCost + 18 * NumTraits::AddCost, + PacketAccess = packet_traits::HasNdtri + }; +}; + /** \internal * \brief Template functor to compute the exponentially scaled modified Bessel * function of order zero diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h index fbdfd299e..538db2afa 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h @@ -30,6 +30,9 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::ha template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) { return Eigen::half(Eigen::numext::erfc(static_cast(a))); } +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ndtri(const Eigen::half& a) { + return Eigen::half(Eigen::numext::ndtri(static_cast(a))); +} template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) { return Eigen::half(Eigen::numext::igamma(static_cast(a), static_cast(x))); } diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h index 1464a9751..78050d0a1 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h @@ -38,63 +38,6 @@ namespace internal { namespace cephes { -/* polevl (modified for Eigen) - * - * Evaluate polynomial - * - * - * - * SYNOPSIS: - * - * int N; - * Scalar x, y, coef[N+1]; - * - * y = polevl( x, coef); - * - * - * - * DESCRIPTION: - * - * Evaluates polynomial of degree N: - * - * 2 N - * y = C + C x + C x +...+ C x - * 0 1 2 N - * - * Coefficients are stored in reverse order: - * - * coef[0] = C , ..., coef[N] = C . - * N 0 - * - * The function p1evl() assumes that coef[N] = 1.0 and is - * omitted from the array. Its calling arguments are - * otherwise the same as polevl(). - * - * - * The Eigen implementation is templatized. For best speed, store - * coef as a const array (constexpr), e.g. - * - * const double coef[] = {1.0, 2.0, 3.0, ...}; - * - */ -template -struct polevl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar x, const Scalar coef[]) { - EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - - return polevl::run(x, coef) * x + coef[N]; - } -}; - -template -struct polevl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar, const Scalar coef[]) { - return coef[0]; - } -}; - /* chbevl (modified for Eigen) * * Evaluate Chebyshev series @@ -190,7 +133,7 @@ template <> struct lgamma_impl { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float run(float x) { -#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) +#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) int dummy; return ::lgammaf_r(x, &dummy); #elif defined(SYCL_DEVICE_ONLY) @@ -205,7 +148,7 @@ template <> struct lgamma_impl { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE double run(double x) { -#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) +#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) int dummy; return ::lgamma_r(x, &dummy); #elif defined(SYCL_DEVICE_ONLY) @@ -264,7 +207,7 @@ struct digamma_impl_maybe_poly { float z; if (s < 1.0e8f) { z = 1.0f / (s * s); - return z * cephes::polevl::run(z, A); + return z * internal::ppolevl::run(z, A); } else return 0.0f; } }; @@ -286,7 +229,7 @@ struct digamma_impl_maybe_poly { double z; if (s < 1.0e17) { z = 1.0 / (s * s); - return z * cephes::polevl::run(z, A); + return z * internal::ppolevl::run(z, A); } else return 0.0; } @@ -494,6 +437,246 @@ struct erfc_impl { }; #endif // EIGEN_HAS_C99_MATH + +/*************************************************************************** +* Implementation of ndtri. * +****************************************************************************/ + +/* Inverse of Normal distribution function (modified for Eigen). + * + * + * SYNOPSIS: + * + * double x, y, ndtri(); + * + * x = ndtri( y ); + * + * + * + * DESCRIPTION: + * + * Returns the argument, x, for which the area under the + * Gaussian probability density function (integrated from + * minus infinity to x) is equal to y. + * + * + * For small arguments 0 < y < exp(-2), the program computes + * z = sqrt( -2.0 * log(y) ); then the approximation is + * x = z - log(z)/z - (1/z) P(1/z) / Q(1/z). + * There are two rational functions P/Q, one for 0 < y < exp(-32) + * and the other for y up to exp(-2). For larger arguments, + * w = y - 0.5, and x/sqrt(2pi) = w + w**3 R(w**2)/S(w**2)). + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * DEC 0.125, 1 5500 9.5e-17 2.1e-17 + * DEC 6e-39, 0.135 3500 5.7e-17 1.3e-17 + * IEEE 0.125, 1 20000 7.2e-16 1.3e-16 + * IEEE 3e-308, 0.135 50000 4.6e-16 9.8e-17 + * + * + * ERROR MESSAGES: + * + * message condition value returned + * ndtri domain x <= 0 -MAXNUM + * ndtri domain x >= 1 MAXNUM + * + */ + /* + Cephes Math Library Release 2.2: June, 1992 + Copyright 1985, 1987, 1992 by Stephen L. Moshier + Direct inquiries to 30 Frost Street, Cambridge, MA 02140 + */ + + +// TODO: Add a cheaper approximation for float. + + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T flipsign( + const T& should_flipsign, const T& x) { + const T sign_mask = pset1(-0.0); + T sign_bit = pand(should_flipsign, sign_mask); + return pxor(sign_bit, x); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double flipsign( + const double& should_flipsign, const double& x) { + return should_flipsign == 0 ? x : -x; +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float flipsign( + const float& should_flipsign, const float& x) { + return should_flipsign == 0 ? x : -x; +} + +// We split this computation in to two so that in the scalar path +// only one branch is evaluated (due to our template specialization of pselect +// being an if statement.) + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_gt_exp_neg_two(const T& b) { + const ScalarType p0[] = { + ScalarType(-5.99633501014107895267e1), + ScalarType(9.80010754185999661536e1), + ScalarType(-5.66762857469070293439e1), + ScalarType(1.39312609387279679503e1), + ScalarType(-1.23916583867381258016e0) + }; + const ScalarType q0[] = { + ScalarType(1.0), + ScalarType(1.95448858338141759834e0), + ScalarType(4.67627912898881538453e0), + ScalarType(8.63602421390890590575e1), + ScalarType(-2.25462687854119370527e2), + ScalarType(2.00260212380060660359e2), + ScalarType(-8.20372256168333339912e1), + ScalarType(1.59056225126211695515e1), + ScalarType(-1.18331621121330003142e0) + }; + const T sqrt2pi = pset1(ScalarType(2.50662827463100050242e0)); + const T half = pset1(ScalarType(0.5)); + T c, c2, ndtri_gt_exp_neg_two; + + c = psub(b, half); + c2 = pmul(c, c); + ndtri_gt_exp_neg_two = pmadd(c, pmul( + c2, pdiv( + internal::ppolevl::run(c2, p0), + internal::ppolevl::run(c2, q0))), c); + return pmul(ndtri_gt_exp_neg_two, sqrt2pi); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_lt_exp_neg_two( + const T& b, const T& should_flipsign) { + /* Approximation for interval z = sqrt(-2 log a ) between 2 and 8 + * i.e., a between exp(-2) = .135 and exp(-32) = 1.27e-14. + */ + const ScalarType p1[] = { + ScalarType(4.05544892305962419923e0), + ScalarType(3.15251094599893866154e1), + ScalarType(5.71628192246421288162e1), + ScalarType(4.40805073893200834700e1), + ScalarType(1.46849561928858024014e1), + ScalarType(2.18663306850790267539e0), + ScalarType(-1.40256079171354495875e-1), + ScalarType(-3.50424626827848203418e-2), + ScalarType(-8.57456785154685413611e-4) + }; + const ScalarType q1[] = { + ScalarType(1.0), + ScalarType(1.57799883256466749731e1), + ScalarType(4.53907635128879210584e1), + ScalarType(4.13172038254672030440e1), + ScalarType(1.50425385692907503408e1), + ScalarType(2.50464946208309415979e0), + ScalarType(-1.42182922854787788574e-1), + ScalarType(-3.80806407691578277194e-2), + ScalarType(-9.33259480895457427372e-4) + }; + /* Approximation for interval z = sqrt(-2 log a ) between 8 and 64 + * i.e., a between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890. + */ + const ScalarType p2[] = { + ScalarType(3.23774891776946035970e0), + ScalarType(6.91522889068984211695e0), + ScalarType(3.93881025292474443415e0), + ScalarType(1.33303460815807542389e0), + ScalarType(2.01485389549179081538e-1), + ScalarType(1.23716634817820021358e-2), + ScalarType(3.01581553508235416007e-4), + ScalarType(2.65806974686737550832e-6), + ScalarType(6.23974539184983293730e-9) + }; + const ScalarType q2[] = { + ScalarType(1.0), + ScalarType(6.02427039364742014255e0), + ScalarType(3.67983563856160859403e0), + ScalarType(1.37702099489081330271e0), + ScalarType(2.16236993594496635890e-1), + ScalarType(1.34204006088543189037e-2), + ScalarType(3.28014464682127739104e-4), + ScalarType(2.89247864745380683936e-6), + ScalarType(6.79019408009981274425e-9) + }; + const T eight = pset1(ScalarType(8.0)); + const T one = pset1(ScalarType(1)); + const T neg_two = pset1(ScalarType(-2)); + T x, x0, x1, z; + + x = psqrt(pmul(neg_two, plog(b))); + x0 = psub(x, pdiv(plog(x), x)); + z = one / x; + x1 = pmul( + z, pselect( + pcmp_lt(x, eight), + pdiv(internal::ppolevl::run(z, p1), + internal::ppolevl::run(z, q1)), + pdiv(internal::ppolevl::run(z, p2), + internal::ppolevl::run(z, q2)))); + return flipsign(should_flipsign, psub(x0, x1)); +} + +template +T generic_ndtri(const T& a) { + const T maxnum = pset1(NumTraits::infinity()); + const T neg_maxnum = pset1(-NumTraits::infinity()); + + const T zero = pset1(ScalarType(0)); + const T one = pset1(ScalarType(1)); + // exp(-2) + const T exp_neg_two = pset1(ScalarType(0.13533528323661269189)); + T b, ndtri, should_flipsign; + + should_flipsign = pcmp_le(a, psub(one, exp_neg_two)); + b = pselect(should_flipsign, a, psub(one, a)); + + ndtri = pselect( + pcmp_lt(exp_neg_two, b), + generic_ndtri_gt_exp_neg_two(b), + generic_ndtri_lt_exp_neg_two(b, should_flipsign)); + + return pselect( + pcmp_le(a, zero), neg_maxnum, + pselect(pcmp_le(one, a), maxnum, ndtri)); +} + +template +struct ndtri_retval { + typedef Scalar type; +}; + +#if !EIGEN_HAS_C99_MATH + +template +struct ndtri_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +# else + +template +struct ndtri_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_ndtri(x); + } +}; + +#endif // EIGEN_HAS_C99_MATH + + /************************************************************************************************************** * Implementation of igammac (complemented incomplete gamma integral), based on Cephes but requires C++11/C99 * **************************************************************************************************************/ @@ -2120,6 +2303,12 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x); } +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(ndtri, Scalar) + ndtri(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(ndtri, Scalar)::run(x); +} + template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar) igamma(const Scalar& a, const Scalar& x) { diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h index 465f41d54..b6323c4db 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h @@ -38,6 +38,13 @@ Packet perf(const Packet& a) { using numext::erf; return erf(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } +/** \internal \returns the ndtri(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pndtri(const Packet& a) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_ndtri; return generic_ndtri(a); +} + /** \internal \returns the incomplete gamma function igamma(\a a, \a x) */ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); } diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h index 40abcee3a..c831edc17 100644 --- a/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h +++ b/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h @@ -101,6 +101,19 @@ double2 perfc(const double2& a) return make_double2(erfc(a.x), erfc(a.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pndtri(const float4& a) +{ + using numext::ndtri; + return make_float4(ndtri(a.x), ndtri(a.y), ndtri(a.z), ndtri(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pndtri(const double2& a) +{ + using numext::ndtri; + return make_double2(ndtri(a.x), ndtri(a.y)); +} template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pigamma(const float4& a, const float4& x) diff --git a/unsupported/test/cxx11_tensor_gpu.cu b/unsupported/test/cxx11_tensor_gpu.cu index 94625e6a3..0a2fa8e61 100644 --- a/unsupported/test/cxx11_tensor_gpu.cu +++ b/unsupported/test/cxx11_tensor_gpu.cu @@ -1069,6 +1069,66 @@ void test_gpu_erfc(const Scalar stddev) gpuFree(d_out); } #endif +template +void test_gpu_ndtri() +{ + Tensor in_x(8); + Tensor out(8); + Tensor expected_out(8); + out.setZero(); + + in_x(0) = Scalar(1); + in_x(1) = Scalar(0.); + in_x(2) = Scalar(0.5); + in_x(3) = Scalar(0.2); + in_x(4) = Scalar(0.8); + in_x(5) = Scalar(0.9); + in_x(6) = Scalar(0.1); + in_x(7) = Scalar(0.99); + in_x(8) = Scalar(0.01); + + expected_out(0) = std::numeric_limits::infinity(); + expected_out(1) = -std::numeric_limits::infinity(); + expected_out(2) = Scalar(0.0); + expected_out(3) = Scalar(-0.8416212335729142); + expected_out(4) = Scalar(0.8416212335729142);j + expected_out(5) = Scalar(1.2815515655446004); + expected_out(6) = Scalar(-1.2815515655446004); + expected_out(7) = Scalar(2.3263478740408408); + expected_out(8) = Scalar(-2.3263478740408408); + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in_x; + Scalar* d_out; + gpuMalloc((void**)(&d_in_x), bytes); + gpuMalloc((void**)(&d_out), bytes); + + gpuMemcpy(d_in_x, in_x.data(), bytes, gpuMemcpyHostToDevice); + + Eigen::GpuStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in_x(d_in_x, 6); + Eigen::TensorMap > gpu_out(d_out, 6); + + gpu_out.device(gpu_device) = gpu_in_x.ndtri(); + + assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess); + assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess); + + VERIFY_IS_EQUAL(out(0), expected_out(0)); + VERIFY((std::isnan)(out(3))); + + for (int i = 1; i < 6; ++i) { + if (i != 3) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + } + + gpuFree(d_in_x); + gpuFree(d_out); +} template void test_gpu_betainc() @@ -1538,6 +1598,10 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu) #if !defined(EIGEN_USE_HIP) // disable these tests on HIP for now. + + CALL_SUBTEST_5(test_gpu_ndtri()); + CALL_SUBTEST_5(test_gpu_ndtri()); + CALL_SUBTEST_5(test_gpu_digamma()); CALL_SUBTEST_5(test_gpu_digamma()); diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp index 50dae6c93..140a5e4c1 100644 --- a/unsupported/test/special_functions.cpp +++ b/unsupported/test/special_functions.cpp @@ -133,6 +133,26 @@ template void array_special_functions() } #endif // EIGEN_HAS_C99_MATH + // Check the ndtri function against scipy.special.ndtri + { + ArrayType x(7), res(7), ref(7); + x << 0.5, 0.2, 0.8, 0.9, 0.1, 0.99, 0.01; + ref << 0., -0.8416212335729142, 0.8416212335729142, 1.2815515655446004, -1.2815515655446004, 2.3263478740408408, -2.3263478740408408; + CALL_SUBTEST( verify_component_wise(ref, ref); ); + CALL_SUBTEST( res = x.ndtri(); verify_component_wise(res, ref); ); + CALL_SUBTEST( res = ndtri(x); verify_component_wise(res, ref); ); + + // ndtri(normal_cdf(x)) ~= x + CALL_SUBTEST( + ArrayType m1 = ArrayType::Random(32); + using std::sqrt; + + ArrayType cdf_val = (m1 / sqrt(2.)).erf(); + cdf_val = (cdf_val + 1.) / 2.; + verify_component_wise(cdf_val.ndtri(), m1);); + + } + // Check the zeta function against scipy.special.zeta { ArrayType x(7), q(7), res(7), ref(7); -- cgit v1.2.3 From 5702a579261b7227089a7e642fa9be0cb0fe1ad5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 4 Sep 2019 22:57:04 +0200 Subject: Fix possible warning regarding strict equality comparisons --- Eigen/src/Core/GenericPacketMath.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 651e3f7b3..5ce984caf 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -276,12 +276,12 @@ pselect(const Packet& mask, const Packet& a, const Packet& b) { template<> EIGEN_DEVICE_FUNC inline float pselect( const float& mask, const float& a, const float&b) { - return mask == 0 ? b : a; + return numext::equal_strict(mask,0.f) ? b : a; } template<> EIGEN_DEVICE_FUNC inline double pselect( const double& mask, const double& a, const double& b) { - return mask == 0 ? b : a; + return numext::equal_strict(mask,0.) ? b : a; } /** \internal \returns a <= b as a bit mask */ -- cgit v1.2.3 From e6c183f8fd0c9c093eb30e08bd08e8e48a80264c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 4 Sep 2019 23:00:21 +0200 Subject: Fix doc issues regarding ndtri --- Eigen/src/plugins/ArrayCwiseUnaryOps.h | 12 +++++++----- doc/CoeffwiseMathFunctionsTable.dox | 12 ++++++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 4aef72d92..06ac7aad0 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -608,16 +608,18 @@ erfc() const return ErfcReturnType(derived()); } -/** \cpp11 \returns an expression of the coefficient-wise Complementary error +/** \returns an expression of the coefficient-wise inverse of the CDF of the Normal distribution function * function of *this. * * \specialfunctions_module + * + * In other words, considering `x = ndtri(y)`, it returns the argument, x, for which the area under the + * Gaussian probability density function (integrated from minus infinity to x) is equal to y. * - * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, - * or float/double in non c++11 mode, the user has to provide implementations of ndtri(T) for any scalar - * type T to be supported. + * \note This function supports only float and double scalar types. To support other scalar types, + * the user has to provide implementations of ndtri(T) for any scalar type T to be supported. * - * \sa Math functions, erf() + * \sa Math functions */ EIGEN_DEVICE_FUNC inline const NdtriReturnType diff --git a/doc/CoeffwiseMathFunctionsTable.dox b/doc/CoeffwiseMathFunctionsTable.dox index 080e056e1..8186a5272 100644 --- a/doc/CoeffwiseMathFunctionsTable.dox +++ b/doc/CoeffwiseMathFunctionsTable.dox @@ -553,6 +553,18 @@ This also means that, unless specified, if the function \c std::foo is available + + + \anchor cwisetable_ndtri + a.\link ArrayBase::ndtri ndtri\endlink(); \n + \link Eigen::ndtri ndtri\endlink(a); + + Inverse of the CDF of the Normal distribution function + + built-in for float and double + + + -- cgit v1.2.3