Add tiled evaluation support to TensorExecutor

author: Eugene Zhulenev <ezhulenev@google.com> 2018-07-25 13:51:10 -0700
committer: Eugene Zhulenev <ezhulenev@google.com> 2018-07-25 13:51:10 -0700
commit: 6913221c43c6ad41b1fbfc0d263d2764abd11ad2 (patch)
tree: e5dbd8f9c73087d37b1e812bc679d1dec2d3bfcd /unsupported/test
parent: d55efa6f0f9ab9ec758c6b40204be476c01b7528 (diff)
5 files changed, 85 insertions, 3 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index fa19b2159..239a80926 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -213,6 +213,7 @@ if(EIGEN_TEST_CXX11)
   ei_add_test(cxx11_tensor_striding)
   ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
   ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+  ei_add_test(cxx11_tensor_executor "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
   ei_add_test(cxx11_tensor_ref)
   ei_add_test(cxx11_tensor_random)
   ei_add_test(cxx11_tensor_generator)
diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp
index 15f2392a3..416b686e4 100644
--- a/unsupported/test/cxx11_tensor_block_access.cpp
+++ b/unsupported/test/cxx11_tensor_block_access.cpp
@@ -901,7 +901,7 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
   CALL_SUBTEST(NAME<ColMajor>(ARG)); \
   CALL_SUBTEST(NAME<RowMajor>(ARG))
 
-EIGEN_DECLARE_TEST(cxx11_tensor_assign) {
+EIGEN_DECLARE_TEST(cxx11_tensor_block_access) {
   CALL_SUBTEST_LAYOUTS(test_block_mapper_sanity);
   CALL_SUBTEST_LAYOUTS(test_block_mapper_maps_every_element);
   CALL_SUBTEST_LAYOUTS(test_slice_block_mapper_maps_every_element);
diff --git a/unsupported/test/cxx11_tensor_complex_cwise_ops_gpu.cu b/unsupported/test/cxx11_tensor_complex_cwise_ops_gpu.cu
index aa28457b1..f2a2a6cfa 100644
--- a/unsupported/test/cxx11_tensor_complex_cwise_ops_gpu.cu
+++ b/unsupported/test/cxx11_tensor_complex_cwise_ops_gpu.cu
@@ -93,7 +93,7 @@ void test_cuda_complex_cwise_ops() {
 }
 
 
-void test_cxx11_tensor_complex_cwise_ops()
+EIGEN_DECLARE_TEST(test_cxx11_tensor_complex_cwise_ops)
 {
   CALL_SUBTEST(test_cuda_complex_cwise_ops<float>());
   CALL_SUBTEST(test_cuda_complex_cwise_ops<double>());
diff --git a/unsupported/test/cxx11_tensor_complex_gpu.cu b/unsupported/test/cxx11_tensor_complex_gpu.cu
index 7cf06aa7a..f8b8ae704 100644
--- a/unsupported/test/cxx11_tensor_complex_gpu.cu
+++ b/unsupported/test/cxx11_tensor_complex_gpu.cu
@@ -177,7 +177,7 @@ static void test_cuda_product_reductions() {
 }
 
 
-void test_cxx11_tensor_complex()
+EIGEN_DECLARE_TEST(test_cxx11_tensor_complex)
 {
   CALL_SUBTEST(test_cuda_nullary());
   CALL_SUBTEST(test_cuda_sum_reductions());
diff --git a/unsupported/test/cxx11_tensor_executor.cpp b/unsupported/test/cxx11_tensor_executor.cpp
new file mode 100644
index 000000000..5ae45ac5b
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_executor.cpp
@@ -0,0 +1,81 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2018 Eugene Zhulenev <ezhulenev@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Index;
+using Eigen::Tensor;
+using Eigen::RowMajor;
+using Eigen::ColMajor;
+
+// A set of tests to verify that different TensorExecutor strategies yields the
+// same results for all the ops, supporting tiled execution.
+
+template <typename Device, bool Vectorizable, bool Tileable, int Layout>
+static void test_execute_binary_expr(Device d) {
+  // Pick a large enough tensor size to bypass small tensor block evaluation
+  // optimization.
+  Tensor<float, 3> lhs(840, 390, 37);
+  Tensor<float, 3> rhs(840, 390, 37);
+  Tensor<float, 3> dst(840, 390, 37);
+
+  lhs.setRandom();
+  rhs.setRandom();
+
+  const auto expr = lhs + rhs;
+
+  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
+  using Executor =
+      internal::TensorExecutor<const Assign, Device, Vectorizable, Tileable>;
+
+  Executor::run(Assign(dst, expr), d);
+
+  for (int i = 0; i < 840; ++i) {
+    for (int j = 0; j < 390; ++j) {
+      for (int k = 0; k < 37; ++k) {
+        float sum = lhs(i, j, k) + rhs(i, j, k);
+        VERIFY_IS_EQUAL(sum, dst(i, j, k));
+      }
+    }
+  }
+}
+
+#define CALL_SUBTEST_COMBINATIONS(NAME)                                        \
+  CALL_SUBTEST((NAME<DefaultDevice, false, false, ColMajor>(default_device))); \
+  CALL_SUBTEST((NAME<DefaultDevice, false, true, ColMajor>(default_device)));  \
+  CALL_SUBTEST((NAME<DefaultDevice, true, false, ColMajor>(default_device)));  \
+  CALL_SUBTEST((NAME<DefaultDevice, true, true, ColMajor>(default_device)));   \
+  CALL_SUBTEST((NAME<DefaultDevice, false, false, RowMajor>(default_device))); \
+  CALL_SUBTEST((NAME<DefaultDevice, false, true, RowMajor>(default_device)));  \
+  CALL_SUBTEST((NAME<DefaultDevice, true, false, RowMajor>(default_device)));  \
+  CALL_SUBTEST((NAME<DefaultDevice, true, true, RowMajor>(default_device)));   \
+  CALL_SUBTEST((NAME<ThreadPoolDevice, false, false, ColMajor>(tp_device)));   \
+  CALL_SUBTEST((NAME<ThreadPoolDevice, false, true, ColMajor>(tp_device)));    \
+  CALL_SUBTEST((NAME<ThreadPoolDevice, true, false, ColMajor>(tp_device)));    \
+  CALL_SUBTEST((NAME<ThreadPoolDevice, true, true, ColMajor>(tp_device)));     \
+  CALL_SUBTEST((NAME<ThreadPoolDevice, false, false, RowMajor>(tp_device)));   \
+  CALL_SUBTEST((NAME<ThreadPoolDevice, false, true, RowMajor>(tp_device)));    \
+  CALL_SUBTEST((NAME<ThreadPoolDevice, true, false, RowMajor>(tp_device)));    \
+  CALL_SUBTEST((NAME<ThreadPoolDevice, true, true, RowMajor>(tp_device)))
+
+EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
+  Eigen::DefaultDevice default_device;
+
+  const auto num_threads = internal::random<int>(1, 24);
+  Eigen::ThreadPool tp(num_threads);
+  Eigen::ThreadPoolDevice tp_device(&tp, num_threads);
+
+  CALL_SUBTEST_COMBINATIONS(test_execute_binary_expr);
+}
+
+#undef CALL_SUBTEST_COMBINATIONS
author	Eugene Zhulenev <ezhulenev@google.com>	2018-07-25 13:51:10 -0700
committer	Eugene Zhulenev <ezhulenev@google.com>	2018-07-25 13:51:10 -0700
commit	6913221c43c6ad41b1fbfc0d263d2764abd11ad2 (patch)
tree	e5dbd8f9c73087d37b1e812bc679d1dec2d3bfcd /unsupported/test
parent	d55efa6f0f9ab9ec758c6b40204be476c01b7528 (diff)