aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Srinivas Vasudevan <srvasude@google.com>2019-09-04 23:50:52 -0400
committerGravatar Srinivas Vasudevan <srvasude@google.com>2019-09-04 23:50:52 -0400
commita9cf823db7eeede110c33121d0ed17d98eb167fa (patch)
treed8929204b06fb98fc1cc199eb13f481e7efb1b96
parent99036a3615a57315564ab86f1d8754bc6d77c8f3 (diff)
parente6c183f8fd0c9c093eb30e08bd08e8e48a80264c (diff)
Merged eigen/eigen
-rw-r--r--Eigen/src/Core/GenericPacketMath.h4
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h4
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h1
-rw-r--r--Eigen/src/SparseCore/SparseCwiseUnaryOp.h2
-rw-r--r--Eigen/src/SparseCore/SparseView.h1
-rw-r--r--Eigen/src/plugins/ArrayCwiseUnaryOps.h12
-rw-r--r--doc/CoeffwiseMathFunctionsTable.dox12
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h10
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h34
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h4
-rw-r--r--unsupported/test/cxx11_tensor_executor.cpp19
-rw-r--r--unsupported/test/cxx11_tensor_thread_pool.cpp47
13 files changed, 96 insertions, 62 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 651e3f7b3..5ce984caf 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -276,12 +276,12 @@ pselect(const Packet& mask, const Packet& a, const Packet& b) {
template<> EIGEN_DEVICE_FUNC inline float pselect<float>(
const float& mask, const float& a, const float&b) {
- return mask == 0 ? b : a;
+ return numext::equal_strict(mask,0.f) ? b : a;
}
template<> EIGEN_DEVICE_FUNC inline double pselect<double>(
const double& mask, const double& a, const double& b) {
- return mask == 0 ? b : a;
+ return numext::equal_strict(mask,0.) ? b : a;
}
/** \internal \returns a <= b as a bit mask */
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 744d7c4e4..11c8dae02 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -95,8 +95,8 @@ template<> struct packet_traits<float> : default_packet_traits
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
#ifdef EIGEN_VECTORIZE_AVX512DQ
HasLog = 1,
- HasLog1p = 1,
- HasExpm1 = 1,
+ HasLog1p = 1,
+ HasExpm1 = 1,
HasNdtri = 1,
#endif
HasExp = 1,
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 5da8ff5f4..ddd2979af 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -112,6 +112,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasLog = 1,
HasLog1p = 1,
HasExpm1 = 1,
+ HasNdtri = 1,
HasExp = 1,
HasNdtri = 1,
HasSqrt = 1,
diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
index ea7973790..df6c28d2b 100644
--- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
@@ -49,6 +49,7 @@ template<typename UnaryOp, typename ArgType>
class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::InnerIterator
: public unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator
{
+ protected:
typedef typename XprType::Scalar Scalar;
typedef typename unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator Base;
public:
@@ -99,6 +100,7 @@ template<typename ViewOp, typename ArgType>
class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::InnerIterator
: public unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator
{
+ protected:
typedef typename XprType::Scalar Scalar;
typedef typename unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator Base;
public:
diff --git a/Eigen/src/SparseCore/SparseView.h b/Eigen/src/SparseCore/SparseView.h
index 7c4aea743..92b3d1f7b 100644
--- a/Eigen/src/SparseCore/SparseView.h
+++ b/Eigen/src/SparseCore/SparseView.h
@@ -90,6 +90,7 @@ struct unary_evaluator<SparseView<ArgType>, IteratorBased>
class InnerIterator : public EvalIterator
{
+ protected:
typedef typename XprType::Scalar Scalar;
public:
diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
index 4aef72d92..06ac7aad0 100644
--- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -608,16 +608,18 @@ erfc() const
return ErfcReturnType(derived());
}
-/** \cpp11 \returns an expression of the coefficient-wise Complementary error
+/** \returns an expression of the coefficient-wise inverse of the CDF of the Normal distribution function
* function of *this.
*
* \specialfunctions_module
+ *
+ * In other words, considering `x = ndtri(y)`, it returns the argument, x, for which the area under the
+ * Gaussian probability density function (integrated from minus infinity to x) is equal to y.
*
- * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
- * or float/double in non c++11 mode, the user has to provide implementations of ndtri(T) for any scalar
- * type T to be supported.
+ * \note This function supports only float and double scalar types. To support other scalar types,
+ * the user has to provide implementations of ndtri(T) for any scalar type T to be supported.
*
- * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_ndtri">Math functions</a>, erf()
+ * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_ndtri">Math functions</a>
*/
EIGEN_DEVICE_FUNC
inline const NdtriReturnType
diff --git a/doc/CoeffwiseMathFunctionsTable.dox b/doc/CoeffwiseMathFunctionsTable.dox
index 080e056e1..8186a5272 100644
--- a/doc/CoeffwiseMathFunctionsTable.dox
+++ b/doc/CoeffwiseMathFunctionsTable.dox
@@ -553,6 +553,18 @@ This also means that, unless specified, if the function \c std::foo is available
</td>
<td></td>
</tr>
+<tr>
+ <td class="code">
+ \anchor cwisetable_ndtri
+ a.\link ArrayBase::ndtri ndtri\endlink(); \n
+ \link Eigen::ndtri ndtri\endlink(a);
+ </td>
+ <td>Inverse of the CDF of the Normal distribution function</td>
+ <td>
+ built-in for float and double
+ </td>
+ <td></td>
+</tr>
<tr><td colspan="4"></td></tr>
</table>
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 099522e39..bcb0daf30 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -1071,12 +1071,12 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
#ifdef EIGEN_USE_THREADS
// Select the async device on which to evaluate the expression.
- template <typename DeviceType>
+ template <typename DeviceType, typename DoneCallback>
typename internal::enable_if<
internal::is_same<DeviceType, ThreadPoolDevice>::value,
- TensorAsyncDevice<Derived, DeviceType>>::type
- device(const DeviceType& dev, std::function<void()> done) {
- return TensorAsyncDevice<Derived, DeviceType>(dev, derived(), std::move(done));
+ TensorAsyncDevice<Derived, DeviceType, DoneCallback>>::type
+ device(const DeviceType& dev, DoneCallback done) {
+ return TensorAsyncDevice<Derived, DeviceType, DoneCallback>(dev, derived(), std::move(done));
}
#endif // EIGEN_USE_THREADS
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
index 5122b3623..cc9c65702 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
@@ -73,21 +73,21 @@ template <typename ExpressionType, typename DeviceType> class TensorDevice {
* ThreadPoolDevice).
*
* Example:
- * std::function<void()> done = []() {};
+ * auto done = []() { ... expression evaluation done ... };
* C.device(EIGEN_THREAD_POOL, std::move(done)) = A + B;
*/
-template <typename ExpressionType, typename DeviceType>
+template <typename ExpressionType, typename DeviceType, typename DoneCallback>
class TensorAsyncDevice {
public:
TensorAsyncDevice(const DeviceType& device, ExpressionType& expression,
- std::function<void()> done)
+ DoneCallback done)
: m_device(device), m_expression(expression), m_done(std::move(done)) {}
template <typename OtherDerived>
EIGEN_STRONG_INLINE TensorAsyncDevice& operator=(const OtherDerived& other) {
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
- typedef internal::TensorAsyncExecutor<const Assign, DeviceType> Executor;
+ typedef internal::TensorAsyncExecutor<const Assign, DeviceType, DoneCallback> Executor;
// WARNING: After assignment 'm_done' callback will be in undefined state.
Assign assign(m_expression, other);
@@ -99,7 +99,7 @@ class TensorAsyncDevice {
protected:
const DeviceType& m_device;
ExpressionType& m_expression;
- std::function<void()> m_done;
+ DoneCallback m_done;
};
#endif // EIGEN_USE_THREADS
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 10339e5e7..cf07656b3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -101,8 +101,8 @@ class TensorExecutor {
* Default async execution strategy is not implemented. Currently it's only
* available for ThreadPoolDevice (see definition below).
*/
-template <typename Expression, typename Device, bool Vectorizable,
- bool Tileable>
+template <typename Expression, typename Device, typename DoneCallback,
+ bool Vectorizable, bool Tileable>
class TensorAsyncExecutor {};
/**
@@ -419,15 +419,17 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
}
};
-template <typename Expression, bool Vectorizable, bool Tileable>
-class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
+template <typename Expression, typename DoneCallback, bool Vectorizable,
+ bool Tileable>
+class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
+ Vectorizable, Tileable> {
public:
typedef typename Expression::Index StorageIndex;
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
static EIGEN_STRONG_INLINE void runAsync(const Expression& expr,
const ThreadPoolDevice& device,
- std::function<void()> done) {
+ DoneCallback done) {
TensorAsyncExecutorContext* const ctx =
new TensorAsyncExecutorContext(expr, device, std::move(done));
@@ -455,7 +457,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable>
struct TensorAsyncExecutorContext {
TensorAsyncExecutorContext(const Expression& expr,
const ThreadPoolDevice& thread_pool,
- std::function<void()> done)
+ DoneCallback done)
: evaluator(expr, thread_pool), on_done(std::move(done)) {}
~TensorAsyncExecutorContext() {
@@ -466,12 +468,13 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable>
Evaluator evaluator;
private:
- std::function<void()> on_done;
+ DoneCallback on_done;
};
};
-template <typename Expression, bool Vectorizable>
-class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ true> {
+template <typename Expression, typename DoneCallback, bool Vectorizable>
+class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
+ Vectorizable, /*Tileable*/ true> {
public:
typedef typename traits<Expression>::Index StorageIndex;
typedef typename traits<Expression>::Scalar Scalar;
@@ -485,7 +488,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable
static EIGEN_STRONG_INLINE void runAsync(const Expression& expr,
const ThreadPoolDevice& device,
- std::function<void()> done) {
+ DoneCallback done) {
TensorAsyncExecutorContext* const ctx =
new TensorAsyncExecutorContext(expr, device, std::move(done));
@@ -494,9 +497,10 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable
if (total_size < cache_size &&
!ExpressionHasTensorBroadcastingOp<Expression>::value) {
- internal::TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable,
- /*Tileable*/ false>::runAsync(
- expr, device, [ctx]() { delete ctx; });
+ auto delete_ctx = [ctx]() { delete ctx; };
+ internal::TensorAsyncExecutor<
+ Expression, ThreadPoolDevice, decltype(delete_ctx), Vectorizable,
+ /*Tileable*/ false>::runAsync(expr, device, std::move(delete_ctx));
return;
}
@@ -532,7 +536,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable
struct TensorAsyncExecutorContext {
TensorAsyncExecutorContext(const Expression& expr,
const ThreadPoolDevice& thread_pool,
- std::function<void()> done)
+ DoneCallback done)
: device(thread_pool),
evaluator(expr, thread_pool),
on_done(std::move(done)) {}
@@ -548,7 +552,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable
TilingContext tiling;
private:
- std::function<void()> on_done;
+ DoneCallback on_done;
};
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index e823bd932..772dbbe35 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -94,7 +94,7 @@ template<typename XprType, template <class> class MakePointer_ = MakePointer> cl
template<typename XprType> class TensorForcedEvalOp;
template<typename ExpressionType, typename DeviceType> class TensorDevice;
-template<typename ExpressionType, typename DeviceType> class TensorAsyncDevice;
+template<typename ExpressionType, typename DeviceType, typename DoneCallback> class TensorAsyncDevice;
template<typename Derived, typename Device> struct TensorEvaluator;
struct NoOpOutputKernel;
@@ -168,7 +168,7 @@ template <typename Expression, typename Device,
bool Tileable = IsTileable<Device, Expression>::value>
class TensorExecutor;
-template <typename Expression, typename Device,
+template <typename Expression, typename Device, typename DoneCallback,
bool Vectorizable = IsVectorizable<Device, Expression>::value,
bool Tileable = IsTileable<Device, Expression>::value>
class TensorAsyncExecutor;
diff --git a/unsupported/test/cxx11_tensor_executor.cpp b/unsupported/test/cxx11_tensor_executor.cpp
index f4d0401da..aa4ab0b80 100644
--- a/unsupported/test/cxx11_tensor_executor.cpp
+++ b/unsupported/test/cxx11_tensor_executor.cpp
@@ -578,11 +578,15 @@ static void test_async_execute_unary_expr(Device d)
src.setRandom();
const auto expr = src.square();
+ Eigen::Barrier done(1);
+ auto on_done = [&done]() { done.Notify(); };
+
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
- using Executor = internal::TensorAsyncExecutor<const Assign, Device,
+ using DoneCallback = decltype(on_done);
+ using Executor = internal::TensorAsyncExecutor<const Assign, Device, DoneCallback,
Vectorizable, Tileable>;
- Eigen::Barrier done(1);
- Executor::runAsync(Assign(dst, expr), d, [&done]() { done.Notify(); });
+
+ Executor::runAsync(Assign(dst, expr), d, on_done);
done.Wait();
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
@@ -610,12 +614,15 @@ static void test_async_execute_binary_expr(Device d)
const auto expr = lhs + rhs;
+ Eigen::Barrier done(1);
+ auto on_done = [&done]() { done.Notify(); };
+
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
- using Executor = internal::TensorAsyncExecutor<const Assign, Device,
+ using DoneCallback = decltype(on_done);
+ using Executor = internal::TensorAsyncExecutor<const Assign, Device, DoneCallback,
Vectorizable, Tileable>;
- Eigen::Barrier done(1);
- Executor::runAsync(Assign(dst, expr), d, [&done]() { done.Notify(); });
+ Executor::runAsync(Assign(dst, expr), d, on_done);
done.Wait();
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp
index 62973cd08..dae7b0335 100644
--- a/unsupported/test/cxx11_tensor_thread_pool.cpp
+++ b/unsupported/test/cxx11_tensor_thread_pool.cpp
@@ -683,34 +683,39 @@ EIGEN_DECLARE_TEST(cxx11_tensor_thread_pool)
CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<RowMajor>());
CALL_SUBTEST_3(test_multithread_contraction_with_output_kernel<ColMajor>());
CALL_SUBTEST_3(test_multithread_contraction_with_output_kernel<RowMajor>());
- CALL_SUBTEST_3(test_async_multithread_contraction_agrees_with_singlethread<ColMajor>());
- CALL_SUBTEST_3(test_async_multithread_contraction_agrees_with_singlethread<RowMajor>());
+
+ CALL_SUBTEST_4(test_async_multithread_contraction_agrees_with_singlethread<ColMajor>());
+ CALL_SUBTEST_4(test_async_multithread_contraction_agrees_with_singlethread<RowMajor>());
// Test EvalShardedByInnerDimContext parallelization strategy.
- CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction<ColMajor>());
- CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction<RowMajor>());
- CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>());
- CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>());
- CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction<ColMajor>());
- CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction<RowMajor>());
- CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>());
- CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>());
+ CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction<ColMajor>());
+ CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction<RowMajor>());
+ CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>());
+ CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>());
+
+ CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction<ColMajor>());
+ CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction<RowMajor>());
+ CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>());
+ CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>());
// Exercise various cases that have been problematic in the past.
- CALL_SUBTEST_5(test_contraction_corner_cases<ColMajor>());
- CALL_SUBTEST_5(test_contraction_corner_cases<RowMajor>());
+ CALL_SUBTEST_7(test_contraction_corner_cases<ColMajor>());
+ CALL_SUBTEST_7(test_contraction_corner_cases<RowMajor>());
- CALL_SUBTEST_6(test_full_contraction<ColMajor>());
- CALL_SUBTEST_6(test_full_contraction<RowMajor>());
+ CALL_SUBTEST_8(test_full_contraction<ColMajor>());
+ CALL_SUBTEST_8(test_full_contraction<RowMajor>());
- CALL_SUBTEST_7(test_multithreaded_reductions<ColMajor>());
- CALL_SUBTEST_7(test_multithreaded_reductions<RowMajor>());
+ CALL_SUBTEST_9(test_multithreaded_reductions<ColMajor>());
+ CALL_SUBTEST_9(test_multithreaded_reductions<RowMajor>());
- CALL_SUBTEST_7(test_memcpy());
- CALL_SUBTEST_7(test_multithread_random());
+ CALL_SUBTEST_10(test_memcpy());
+ CALL_SUBTEST_10(test_multithread_random());
TestAllocator test_allocator;
- CALL_SUBTEST_7(test_multithread_shuffle<ColMajor>(NULL));
- CALL_SUBTEST_7(test_multithread_shuffle<RowMajor>(&test_allocator));
- CALL_SUBTEST_7(test_threadpool_allocate(&test_allocator));
+ CALL_SUBTEST_11(test_multithread_shuffle<ColMajor>(NULL));
+ CALL_SUBTEST_11(test_multithread_shuffle<RowMajor>(&test_allocator));
+ CALL_SUBTEST_11(test_threadpool_allocate(&test_allocator));
+
+ // Force CMake to split this test.
+ // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11
}