diff options
author | Srinivas Vasudevan <srvasude@google.com> | 2019-09-04 23:50:52 -0400 |
---|---|---|
committer | Srinivas Vasudevan <srvasude@google.com> | 2019-09-04 23:50:52 -0400 |
commit | a9cf823db7eeede110c33121d0ed17d98eb167fa (patch) | |
tree | d8929204b06fb98fc1cc199eb13f481e7efb1b96 | |
parent | 99036a3615a57315564ab86f1d8754bc6d77c8f3 (diff) | |
parent | e6c183f8fd0c9c093eb30e08bd08e8e48a80264c (diff) |
Merged eigen/eigen
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/PacketMath.h | 4 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 1 | ||||
-rw-r--r-- | Eigen/src/SparseCore/SparseCwiseUnaryOp.h | 2 | ||||
-rw-r--r-- | Eigen/src/SparseCore/SparseView.h | 1 | ||||
-rw-r--r-- | Eigen/src/plugins/ArrayCwiseUnaryOps.h | 12 | ||||
-rw-r--r-- | doc/CoeffwiseMathFunctionsTable.dox | 12 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 8 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h | 10 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 34 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h | 4 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_executor.cpp | 19 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_thread_pool.cpp | 47 |
13 files changed, 96 insertions, 62 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 651e3f7b3..5ce984caf 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -276,12 +276,12 @@ pselect(const Packet& mask, const Packet& a, const Packet& b) { template<> EIGEN_DEVICE_FUNC inline float pselect<float>( const float& mask, const float& a, const float&b) { - return mask == 0 ? b : a; + return numext::equal_strict(mask,0.f) ? b : a; } template<> EIGEN_DEVICE_FUNC inline double pselect<double>( const double& mask, const double& a, const double& b) { - return mask == 0 ? b : a; + return numext::equal_strict(mask,0.) ? b : a; } /** \internal \returns a <= b as a bit mask */ diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 744d7c4e4..11c8dae02 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -95,8 +95,8 @@ template<> struct packet_traits<float> : default_packet_traits #if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT) #ifdef EIGEN_VECTORIZE_AVX512DQ HasLog = 1, - HasLog1p = 1, - HasExpm1 = 1, + HasLog1p = 1, + HasExpm1 = 1, HasNdtri = 1, #endif HasExp = 1, diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 5da8ff5f4..ddd2979af 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -112,6 +112,7 @@ template<> struct packet_traits<float> : default_packet_traits HasLog = 1, HasLog1p = 1, HasExpm1 = 1, + HasNdtri = 1, HasExp = 1, HasNdtri = 1, HasSqrt = 1, diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h index ea7973790..df6c28d2b 100644 --- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h @@ -49,6 +49,7 @@ template<typename UnaryOp, typename ArgType> class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::InnerIterator : public unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator { + protected: typedef typename XprType::Scalar Scalar; typedef typename unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator Base; public: @@ -99,6 +100,7 @@ template<typename ViewOp, typename ArgType> class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::InnerIterator : public unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator { + protected: typedef typename XprType::Scalar Scalar; typedef typename unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator Base; public: diff --git a/Eigen/src/SparseCore/SparseView.h b/Eigen/src/SparseCore/SparseView.h index 7c4aea743..92b3d1f7b 100644 --- a/Eigen/src/SparseCore/SparseView.h +++ b/Eigen/src/SparseCore/SparseView.h @@ -90,6 +90,7 @@ struct unary_evaluator<SparseView<ArgType>, IteratorBased> class InnerIterator : public EvalIterator { + protected: typedef typename XprType::Scalar Scalar; public: diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 4aef72d92..06ac7aad0 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -608,16 +608,18 @@ erfc() const return ErfcReturnType(derived()); } -/** \cpp11 \returns an expression of the coefficient-wise Complementary error +/** \returns an expression of the coefficient-wise inverse of the CDF of the Normal distribution function * function of *this. * * \specialfunctions_module + * + * In other words, considering `x = ndtri(y)`, it returns the argument, x, for which the area under the + * Gaussian probability density function (integrated from minus infinity to x) is equal to y. * - * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, - * or float/double in non c++11 mode, the user has to provide implementations of ndtri(T) for any scalar - * type T to be supported. + * \note This function supports only float and double scalar types. To support other scalar types, + * the user has to provide implementations of ndtri(T) for any scalar type T to be supported. * - * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_ndtri">Math functions</a>, erf() + * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_ndtri">Math functions</a> */ EIGEN_DEVICE_FUNC inline const NdtriReturnType diff --git a/doc/CoeffwiseMathFunctionsTable.dox b/doc/CoeffwiseMathFunctionsTable.dox index 080e056e1..8186a5272 100644 --- a/doc/CoeffwiseMathFunctionsTable.dox +++ b/doc/CoeffwiseMathFunctionsTable.dox @@ -553,6 +553,18 @@ This also means that, unless specified, if the function \c std::foo is available </td> <td></td> </tr> +<tr> + <td class="code"> + \anchor cwisetable_ndtri + a.\link ArrayBase::ndtri ndtri\endlink(); \n + \link Eigen::ndtri ndtri\endlink(a); + </td> + <td>Inverse of the CDF of the Normal distribution function</td> + <td> + built-in for float and double + </td> + <td></td> +</tr> <tr><td colspan="4"></td></tr> </table> diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 099522e39..bcb0daf30 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -1071,12 +1071,12 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> { #ifdef EIGEN_USE_THREADS // Select the async device on which to evaluate the expression. - template <typename DeviceType> + template <typename DeviceType, typename DoneCallback> typename internal::enable_if< internal::is_same<DeviceType, ThreadPoolDevice>::value, - TensorAsyncDevice<Derived, DeviceType>>::type - device(const DeviceType& dev, std::function<void()> done) { - return TensorAsyncDevice<Derived, DeviceType>(dev, derived(), std::move(done)); + TensorAsyncDevice<Derived, DeviceType, DoneCallback>>::type + device(const DeviceType& dev, DoneCallback done) { + return TensorAsyncDevice<Derived, DeviceType, DoneCallback>(dev, derived(), std::move(done)); } #endif // EIGEN_USE_THREADS diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h index 5122b3623..cc9c65702 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -73,21 +73,21 @@ template <typename ExpressionType, typename DeviceType> class TensorDevice { * ThreadPoolDevice). * * Example: - * std::function<void()> done = []() {}; + * auto done = []() { ... expression evaluation done ... }; * C.device(EIGEN_THREAD_POOL, std::move(done)) = A + B; */ -template <typename ExpressionType, typename DeviceType> +template <typename ExpressionType, typename DeviceType, typename DoneCallback> class TensorAsyncDevice { public: TensorAsyncDevice(const DeviceType& device, ExpressionType& expression, - std::function<void()> done) + DoneCallback done) : m_device(device), m_expression(expression), m_done(std::move(done)) {} template <typename OtherDerived> EIGEN_STRONG_INLINE TensorAsyncDevice& operator=(const OtherDerived& other) { typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign; - typedef internal::TensorAsyncExecutor<const Assign, DeviceType> Executor; + typedef internal::TensorAsyncExecutor<const Assign, DeviceType, DoneCallback> Executor; // WARNING: After assignment 'm_done' callback will be in undefined state. Assign assign(m_expression, other); @@ -99,7 +99,7 @@ class TensorAsyncDevice { protected: const DeviceType& m_device; ExpressionType& m_expression; - std::function<void()> m_done; + DoneCallback m_done; }; #endif // EIGEN_USE_THREADS diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 10339e5e7..cf07656b3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -101,8 +101,8 @@ class TensorExecutor { * Default async execution strategy is not implemented. Currently it's only * available for ThreadPoolDevice (see definition below). */ -template <typename Expression, typename Device, bool Vectorizable, - bool Tileable> +template <typename Expression, typename Device, typename DoneCallback, + bool Vectorizable, bool Tileable> class TensorAsyncExecutor {}; /** @@ -419,15 +419,17 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr } }; -template <typename Expression, bool Vectorizable, bool Tileable> -class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> { +template <typename Expression, typename DoneCallback, bool Vectorizable, + bool Tileable> +class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback, + Vectorizable, Tileable> { public: typedef typename Expression::Index StorageIndex; typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator; static EIGEN_STRONG_INLINE void runAsync(const Expression& expr, const ThreadPoolDevice& device, - std::function<void()> done) { + DoneCallback done) { TensorAsyncExecutorContext* const ctx = new TensorAsyncExecutorContext(expr, device, std::move(done)); @@ -455,7 +457,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> struct TensorAsyncExecutorContext { TensorAsyncExecutorContext(const Expression& expr, const ThreadPoolDevice& thread_pool, - std::function<void()> done) + DoneCallback done) : evaluator(expr, thread_pool), on_done(std::move(done)) {} ~TensorAsyncExecutorContext() { @@ -466,12 +468,13 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> Evaluator evaluator; private: - std::function<void()> on_done; + DoneCallback on_done; }; }; -template <typename Expression, bool Vectorizable> -class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ true> { +template <typename Expression, typename DoneCallback, bool Vectorizable> +class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback, + Vectorizable, /*Tileable*/ true> { public: typedef typename traits<Expression>::Index StorageIndex; typedef typename traits<Expression>::Scalar Scalar; @@ -485,7 +488,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable static EIGEN_STRONG_INLINE void runAsync(const Expression& expr, const ThreadPoolDevice& device, - std::function<void()> done) { + DoneCallback done) { TensorAsyncExecutorContext* const ctx = new TensorAsyncExecutorContext(expr, device, std::move(done)); @@ -494,9 +497,10 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable if (total_size < cache_size && !ExpressionHasTensorBroadcastingOp<Expression>::value) { - internal::TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, - /*Tileable*/ false>::runAsync( - expr, device, [ctx]() { delete ctx; }); + auto delete_ctx = [ctx]() { delete ctx; }; + internal::TensorAsyncExecutor< + Expression, ThreadPoolDevice, decltype(delete_ctx), Vectorizable, + /*Tileable*/ false>::runAsync(expr, device, std::move(delete_ctx)); return; } @@ -532,7 +536,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable struct TensorAsyncExecutorContext { TensorAsyncExecutorContext(const Expression& expr, const ThreadPoolDevice& thread_pool, - std::function<void()> done) + DoneCallback done) : device(thread_pool), evaluator(expr, thread_pool), on_done(std::move(done)) {} @@ -548,7 +552,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable TilingContext tiling; private: - std::function<void()> on_done; + DoneCallback on_done; }; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index e823bd932..772dbbe35 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -94,7 +94,7 @@ template<typename XprType, template <class> class MakePointer_ = MakePointer> cl template<typename XprType> class TensorForcedEvalOp; template<typename ExpressionType, typename DeviceType> class TensorDevice; -template<typename ExpressionType, typename DeviceType> class TensorAsyncDevice; +template<typename ExpressionType, typename DeviceType, typename DoneCallback> class TensorAsyncDevice; template<typename Derived, typename Device> struct TensorEvaluator; struct NoOpOutputKernel; @@ -168,7 +168,7 @@ template <typename Expression, typename Device, bool Tileable = IsTileable<Device, Expression>::value> class TensorExecutor; -template <typename Expression, typename Device, +template <typename Expression, typename Device, typename DoneCallback, bool Vectorizable = IsVectorizable<Device, Expression>::value, bool Tileable = IsTileable<Device, Expression>::value> class TensorAsyncExecutor; diff --git a/unsupported/test/cxx11_tensor_executor.cpp b/unsupported/test/cxx11_tensor_executor.cpp index f4d0401da..aa4ab0b80 100644 --- a/unsupported/test/cxx11_tensor_executor.cpp +++ b/unsupported/test/cxx11_tensor_executor.cpp @@ -578,11 +578,15 @@ static void test_async_execute_unary_expr(Device d) src.setRandom(); const auto expr = src.square(); + Eigen::Barrier done(1); + auto on_done = [&done]() { done.Notify(); }; + using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; - using Executor = internal::TensorAsyncExecutor<const Assign, Device, + using DoneCallback = decltype(on_done); + using Executor = internal::TensorAsyncExecutor<const Assign, Device, DoneCallback, Vectorizable, Tileable>; - Eigen::Barrier done(1); - Executor::runAsync(Assign(dst, expr), d, [&done]() { done.Notify(); }); + + Executor::runAsync(Assign(dst, expr), d, on_done); done.Wait(); for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { @@ -610,12 +614,15 @@ static void test_async_execute_binary_expr(Device d) const auto expr = lhs + rhs; + Eigen::Barrier done(1); + auto on_done = [&done]() { done.Notify(); }; + using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; - using Executor = internal::TensorAsyncExecutor<const Assign, Device, + using DoneCallback = decltype(on_done); + using Executor = internal::TensorAsyncExecutor<const Assign, Device, DoneCallback, Vectorizable, Tileable>; - Eigen::Barrier done(1); - Executor::runAsync(Assign(dst, expr), d, [&done]() { done.Notify(); }); + Executor::runAsync(Assign(dst, expr), d, on_done); done.Wait(); for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index 62973cd08..dae7b0335 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -683,34 +683,39 @@ EIGEN_DECLARE_TEST(cxx11_tensor_thread_pool) CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<RowMajor>()); CALL_SUBTEST_3(test_multithread_contraction_with_output_kernel<ColMajor>()); CALL_SUBTEST_3(test_multithread_contraction_with_output_kernel<RowMajor>()); - CALL_SUBTEST_3(test_async_multithread_contraction_agrees_with_singlethread<ColMajor>()); - CALL_SUBTEST_3(test_async_multithread_contraction_agrees_with_singlethread<RowMajor>()); + + CALL_SUBTEST_4(test_async_multithread_contraction_agrees_with_singlethread<ColMajor>()); + CALL_SUBTEST_4(test_async_multithread_contraction_agrees_with_singlethread<RowMajor>()); // Test EvalShardedByInnerDimContext parallelization strategy. - CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction<ColMajor>()); - CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction<RowMajor>()); - CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>()); - CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>()); - CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction<ColMajor>()); - CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction<RowMajor>()); - CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>()); - CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>()); + CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction<ColMajor>()); + CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction<RowMajor>()); + CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>()); + CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>()); + + CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction<ColMajor>()); + CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction<RowMajor>()); + CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>()); + CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>()); // Exercise various cases that have been problematic in the past. - CALL_SUBTEST_5(test_contraction_corner_cases<ColMajor>()); - CALL_SUBTEST_5(test_contraction_corner_cases<RowMajor>()); + CALL_SUBTEST_7(test_contraction_corner_cases<ColMajor>()); + CALL_SUBTEST_7(test_contraction_corner_cases<RowMajor>()); - CALL_SUBTEST_6(test_full_contraction<ColMajor>()); - CALL_SUBTEST_6(test_full_contraction<RowMajor>()); + CALL_SUBTEST_8(test_full_contraction<ColMajor>()); + CALL_SUBTEST_8(test_full_contraction<RowMajor>()); - CALL_SUBTEST_7(test_multithreaded_reductions<ColMajor>()); - CALL_SUBTEST_7(test_multithreaded_reductions<RowMajor>()); + CALL_SUBTEST_9(test_multithreaded_reductions<ColMajor>()); + CALL_SUBTEST_9(test_multithreaded_reductions<RowMajor>()); - CALL_SUBTEST_7(test_memcpy()); - CALL_SUBTEST_7(test_multithread_random()); + CALL_SUBTEST_10(test_memcpy()); + CALL_SUBTEST_10(test_multithread_random()); TestAllocator test_allocator; - CALL_SUBTEST_7(test_multithread_shuffle<ColMajor>(NULL)); - CALL_SUBTEST_7(test_multithread_shuffle<RowMajor>(&test_allocator)); - CALL_SUBTEST_7(test_threadpool_allocate(&test_allocator)); + CALL_SUBTEST_11(test_multithread_shuffle<ColMajor>(NULL)); + CALL_SUBTEST_11(test_multithread_shuffle<RowMajor>(&test_allocator)); + CALL_SUBTEST_11(test_threadpool_allocate(&test_allocator)); + + // Force CMake to split this test. + // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11 } |