From 01fd4096d395e7b816459f571bf2328c8435cc37 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 10 Jul 2018 13:16:38 -0700 Subject: Fuse computations into the Tensor contractions using output kernel --- unsupported/test/cxx11_tensor_thread_pool.cpp | 56 +++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'unsupported/test/cxx11_tensor_thread_pool.cpp') diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index 2ef665f30..ea9d8afdc 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -232,6 +232,60 @@ void test_multithread_contraction_agrees_with_singlethread() { } } +// Apply Sqrt to all output elements. +struct SqrtOutputKernel { + template + EIGEN_ALWAYS_INLINE void operator()( + const OutputKernel::OutputMapper& output_mapper, + const TensorContractionParams&, Index, Index, Index num_rows, + Index num_cols) const { + for (int i = 0; i < num_rows; ++i) { + for (int j = 0; j < num_cols; ++j) { + output_mapper(i, j) = std::sqrt(output_mapper(i, j)); + } + } + } +}; + +template +static void test_multithread_contraction_with_output_kernel() { + typedef Tensor::DimensionPair DimPair; + + const int num_threads = internal::random(2, 11); + ThreadPool threads(num_threads); + Eigen::ThreadPoolDevice device(&threads, num_threads); + + Tensor t_left(30, 50, 8, 31); + Tensor t_right(8, 31, 7, 20, 10); + Tensor t_result(30, 50, 7, 20, 10); + + t_left.setRandom(); + t_right.setRandom(); + // Put trash in mat4 to verify contraction clears output memory. + t_result.setRandom(); + + // Add a little offset so that the results won't be close to zero. + t_left += t_left.constant(1.0f); + t_right += t_right.constant(1.0f); + + typedef Map> MapXf; + MapXf m_left(t_left.data(), 1500, 248); + MapXf m_right(t_right.data(), 248, 1400); + Eigen::Matrix m_result(1500, 1400); + + // this contraction should be equivalent to a single matrix multiplication + Eigen::array dims({{DimPair(2, 0), DimPair(3, 1)}}); + + // compute results by separate methods + t_result.device(device) = t_left.contract(t_right, dims, SqrtOutputKernel()); + + m_result = m_left * m_right; + + for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) { + VERIFY(&t_result.data()[i] != &m_result.data()[i]); + VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i])); + } +} template void test_full_contraction() { @@ -355,6 +409,8 @@ void test_cxx11_tensor_thread_pool() CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread()); CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread()); + CALL_SUBTEST_3(test_multithread_contraction_with_output_kernel()); + CALL_SUBTEST_3(test_multithread_contraction_with_output_kernel()); // Exercise various cases that have been problematic in the past. CALL_SUBTEST_4(test_contraction_corner_cases()); -- cgit v1.2.3