From 66665e7e76d2ad5aa37775b3777e9a53c6d1c18c Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Fri, 30 Aug 2019 14:49:40 -0700 Subject: Asynchronous expression evaluation with TensorAsyncDevice --- unsupported/test/cxx11_tensor_executor.cpp | 107 +++++++++++++++++++++++++---- 1 file changed, 95 insertions(+), 12 deletions(-) (limited to 'unsupported/test/cxx11_tensor_executor.cpp') diff --git a/unsupported/test/cxx11_tensor_executor.cpp b/unsupported/test/cxx11_tensor_executor.cpp index e9922a48d..f4d0401da 100644 --- a/unsupported/test/cxx11_tensor_executor.cpp +++ b/unsupported/test/cxx11_tensor_executor.cpp @@ -562,37 +562,112 @@ static void test_execute_reverse_rvalue(Device d) } } +template +static void test_async_execute_unary_expr(Device d) +{ + static constexpr int Options = 0 | Layout; + + // Pick a large enough tensor size to bypass small tensor block evaluation + // optimization. + auto dims = RandomDims(50 / NumDims, 100 / NumDims); + + Tensor src(dims); + Tensor dst(dims); + + src.setRandom(); + const auto expr = src.square(); + + using Assign = TensorAssignOp; + using Executor = internal::TensorAsyncExecutor; + Eigen::Barrier done(1); + Executor::runAsync(Assign(dst, expr), d, [&done]() { done.Notify(); }); + done.Wait(); + + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { + T square = src.coeff(i) * src.coeff(i); + VERIFY_IS_EQUAL(square, dst.coeff(i)); + } +} + +template +static void test_async_execute_binary_expr(Device d) +{ + static constexpr int Options = 0 | Layout; + + // Pick a large enough tensor size to bypass small tensor block evaluation + // optimization. + auto dims = RandomDims(50 / NumDims, 100 / NumDims); + + Tensor lhs(dims); + Tensor rhs(dims); + Tensor dst(dims); + + lhs.setRandom(); + rhs.setRandom(); + + const auto expr = lhs + rhs; + + using Assign = TensorAssignOp; + using Executor = internal::TensorAsyncExecutor; + + Eigen::Barrier done(1); + Executor::runAsync(Assign(dst, expr), d, [&done]() { done.Notify(); }); + done.Wait(); + + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { + T sum = lhs.coeff(i) + rhs.coeff(i); + VERIFY_IS_EQUAL(sum, dst.coeff(i)); + } +} + #ifdef EIGEN_DONT_VECTORIZE #define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL -#else +#else #define VECTORIZABLE(VAL) VAL #endif #define CALL_SUBTEST_PART(PART) \ CALL_SUBTEST_##PART -#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \ - CALL_SUBTEST_PART(PART)((NAME(default_device))); \ - CALL_SUBTEST_PART(PART)((NAME(default_device))); \ +#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \ + CALL_SUBTEST_PART(PART)((NAME(default_device))); \ + CALL_SUBTEST_PART(PART)((NAME(default_device))); \ CALL_SUBTEST_PART(PART)((NAME(default_device))); \ CALL_SUBTEST_PART(PART)((NAME(default_device))); \ - CALL_SUBTEST_PART(PART)((NAME(default_device))); \ - CALL_SUBTEST_PART(PART)((NAME(default_device))); \ + CALL_SUBTEST_PART(PART)((NAME(default_device))); \ + CALL_SUBTEST_PART(PART)((NAME(default_device))); \ CALL_SUBTEST_PART(PART)((NAME(default_device))); \ CALL_SUBTEST_PART(PART)((NAME(default_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ CALL_SUBTEST_PART(PART)((NAME(tp_device))) +// NOTE: Currently only ThreadPoolDevice supports async expression evaluation. +#define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))) + EIGEN_DECLARE_TEST(cxx11_tensor_executor) { Eigen::DefaultDevice default_device; + // Default device is unused in ASYNC tests. + EIGEN_UNUSED_VARIABLE(default_device); - const auto num_threads = internal::random(1, 24); + const auto num_threads = internal::random(20, 24); Eigen::ThreadPool tp(num_threads); Eigen::ThreadPoolDevice tp_device(&tp, num_threads); @@ -660,8 +735,16 @@ EIGEN_DECLARE_TEST(cxx11_tensor_executor) { CALL_SUBTEST_COMBINATIONS(14, test_execute_reverse_rvalue, float, 4); CALL_SUBTEST_COMBINATIONS(14, test_execute_reverse_rvalue, float, 5); + CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 3); + CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 4); + CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 5); + + CALL_ASYNC_SUBTEST_COMBINATIONS(16, test_async_execute_binary_expr, float, 3); + CALL_ASYNC_SUBTEST_COMBINATIONS(16, test_async_execute_binary_expr, float, 4); + CALL_ASYNC_SUBTEST_COMBINATIONS(16, test_async_execute_binary_expr, float, 5); + // Force CMake to split this test. - // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11;12;13;14 + // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16 } #undef CALL_SUBTEST_COMBINATIONS -- cgit v1.2.3