From f0b36fb9a405400e82b73ea70097b8ae3cd1095a Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Fri, 30 Aug 2019 15:13:38 -0700 Subject: evalSubExprsIfNeededAsync + async TensorContractionThreadPool --- unsupported/test/cxx11_tensor_thread_pool.cpp | 140 ++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) (limited to 'unsupported/test/cxx11_tensor_thread_pool.cpp') diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index 53b50d1ed..62973cd08 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -330,6 +330,52 @@ static void test_multithread_contraction_with_output_kernel() { } } +template +void test_async_multithread_contraction_agrees_with_singlethread() +{ + int contract_size = internal::random(100, 500); + + Tensor left(internal::random(10, 40), + contract_size, + internal::random(10, 40)); + + Tensor right( + internal::random(1, 20), internal::random(1, 20), contract_size, + internal::random(1, 20)); + + left.setRandom(); + right.setRandom(); + + // add constants to shift values away from 0 for more precision + left += left.constant(1.5f); + right += right.constant(1.5f); + + typedef Tensor::DimensionPair DimPair; + Eigen::array dims({{DimPair(1, 2)}}); + + Eigen::ThreadPool tp(internal::random(2, 11)); + Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random(8, 32)); + + Tensor st_result; + st_result = left.contract(right, dims); + + Tensor tp_result(st_result.dimensions()); + + Eigen::Barrier barrier(1); + tp_result.device(thread_pool_device, [&barrier]() { barrier.Notify(); }) = + left.contract(right, dims); + barrier.Wait(); + + VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions())); + for (ptrdiff_t i = 0; i < st_result.size(); i++) { + // if both of the values are very small, then do nothing (because the test + // will fail due to numerical precision issues when values are small) + if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) { + VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]); + } + } +} + // We are triggering 'evalShardedByInnerDim' optimization. template static void test_sharded_by_inner_dim_contraction() @@ -410,6 +456,93 @@ static void test_sharded_by_inner_dim_contraction_with_output_kernel() } } +// We are triggering 'evalShardedByInnerDim' optimization. +template +static void test_async_sharded_by_inner_dim_contraction() +{ + typedef Tensor::DimensionPair DimPair; + + const int num_threads = internal::random(4, 16); + ThreadPool threads(num_threads); + Eigen::ThreadPoolDevice device(&threads, num_threads); + + Tensor t_left(2, 10000); + Tensor t_right(10000, 10); + Tensor t_result(2, 10); + + t_left.setRandom(); + t_right.setRandom(); + // Put trash in t_result to verify contraction clears output memory. + t_result.setRandom(); + + // Add a little offset so that the results won't be close to zero. + t_left += t_left.constant(1.0f); + t_right += t_right.constant(1.0f); + + typedef Map> MapXf; + MapXf m_left(t_left.data(), 2, 10000); + MapXf m_right(t_right.data(), 10000, 10); + Eigen::Matrix m_result(2, 10); + + // this contraction should be equivalent to a single matrix multiplication + Eigen::array dims({{DimPair(1, 0)}}); + + // compute results by separate methods + Eigen::Barrier barrier(1); + t_result.device(device, [&barrier]() { barrier.Notify(); }) = + t_left.contract(t_right, dims); + barrier.Wait(); + + m_result = m_left * m_right; + + for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) { + VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]); + } +} + +// We are triggering 'evalShardedByInnerDim' optimization with output kernel. +template +static void test_async_sharded_by_inner_dim_contraction_with_output_kernel() +{ + typedef Tensor::DimensionPair DimPair; + + const int num_threads = internal::random(4, 16); + ThreadPool threads(num_threads); + Eigen::ThreadPoolDevice device(&threads, num_threads); + + Tensor t_left(2, 10000); + Tensor t_right(10000, 10); + Tensor t_result(2, 10); + + t_left.setRandom(); + t_right.setRandom(); + // Put trash in t_result to verify contraction clears output memory. + t_result.setRandom(); + + // Add a little offset so that the results won't be close to zero. + t_left += t_left.constant(1.0f); + t_right += t_right.constant(1.0f); + + typedef Map> MapXf; + MapXf m_left(t_left.data(), 2, 10000); + MapXf m_right(t_right.data(), 10000, 10); + Eigen::Matrix m_result(2, 10); + + // this contraction should be equivalent to a single matrix multiplication + Eigen::array dims({{DimPair(1, 0)}}); + + // compute results by separate methods + Eigen::Barrier barrier(1); + t_result.device(device, [&barrier]() { barrier.Notify(); }) = + t_left.contract(t_right, dims, SqrtOutputKernel()); + barrier.Wait(); + m_result = m_left * m_right; + + for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) { + VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i])); + } +} + template void test_full_contraction() { int contract_size1 = internal::random(1, 500); @@ -550,11 +683,18 @@ EIGEN_DECLARE_TEST(cxx11_tensor_thread_pool) CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread()); CALL_SUBTEST_3(test_multithread_contraction_with_output_kernel()); CALL_SUBTEST_3(test_multithread_contraction_with_output_kernel()); + CALL_SUBTEST_3(test_async_multithread_contraction_agrees_with_singlethread()); + CALL_SUBTEST_3(test_async_multithread_contraction_agrees_with_singlethread()); + // Test EvalShardedByInnerDimContext parallelization strategy. CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction()); CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction()); CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction_with_output_kernel()); CALL_SUBTEST_4(test_sharded_by_inner_dim_contraction_with_output_kernel()); + CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction()); + CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction()); + CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction_with_output_kernel()); + CALL_SUBTEST_4(test_async_sharded_by_inner_dim_contraction_with_output_kernel()); // Exercise various cases that have been problematic in the past. CALL_SUBTEST_5(test_contraction_corner_cases()); -- cgit v1.2.3