From 83c0a16baf5ecac6288cd9b74536a82de8985b31 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 31 Jul 2018 15:56:31 -0700 Subject: Add block evaluation support to TensorOps --- unsupported/test/cxx11_tensor_executor.cpp | 480 ++++++++++++++++++++++++++--- 1 file changed, 443 insertions(+), 37 deletions(-) (limited to 'unsupported/test/cxx11_tensor_executor.cpp') diff --git a/unsupported/test/cxx11_tensor_executor.cpp b/unsupported/test/cxx11_tensor_executor.cpp index 274f901ce..448f47f1d 100644 --- a/unsupported/test/cxx11_tensor_executor.cpp +++ b/unsupported/test/cxx11_tensor_executor.cpp @@ -18,22 +18,30 @@ using Eigen::RowMajor; using Eigen::ColMajor; // A set of tests to verify that different TensorExecutor strategies yields the -// same results for all the ops, supporting tiled execution. +// same results for all the ops, supporting tiled evaluation. + +template +static array RandomDims(int min_dim = 1, int max_dim = 20) { + array dims; + for (int i = 0; i < NumDims; ++i) { + dims[i] = internal::random(min_dim, max_dim); + } + return dims; +}; + +template +static void test_execute_binary_expr(Device d) +{ + static constexpr int Options = 0 | Layout; -template -static void test_execute_binary_expr(Device d) { // Pick a large enough tensor size to bypass small tensor block evaluation // optimization. - int d0 = internal::random(100, 200); - int d1 = internal::random(100, 200); - int d2 = internal::random(100, 200); + auto dims = RandomDims(50 / NumDims, 100 / NumDims); - static constexpr int Options = 0; - using IndexType = int; - - Tensor lhs(d0, d1, d2); - Tensor rhs(d0, d1, d2); - Tensor dst(d0, d1, d2); + Tensor lhs(dims); + Tensor rhs(dims); + Tensor dst(dims); lhs.setRandom(); rhs.setRandom(); @@ -46,33 +54,389 @@ static void test_execute_binary_expr(Device d) { Executor::run(Assign(dst, expr), d); - for (int i = 0; i < d0; ++i) { - for (int j = 0; j < d1; ++j) { - for (int k = 0; k < d2; ++k) { - float sum = lhs(i, j, k) + rhs(i, j, k); - VERIFY_IS_EQUAL(sum, dst(i, j, k)); - } - } + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { + T sum = lhs.coeff(i) + rhs.coeff(i); + VERIFY_IS_EQUAL(sum, dst.coeff(i)); + } +} + +template +static void test_execute_broadcasting(Device d) +{ + static constexpr int Options = 0 | Layout; + + auto dims = RandomDims(1, 10); + Tensor src(dims); + src.setRandom(); + + const auto broadcasts = RandomDims(1, 7); + const auto expr = src.broadcast(broadcasts); + + // We assume that broadcasting on a default device is tested and correct, so + // we can rely on it to verify correctness of tensor executor and tiling. + Tensor golden; + golden = expr; + + // Now do the broadcasting using configured tensor executor. + Tensor dst(golden.dimensions()); + + using Assign = TensorAssignOp; + using Executor = + internal::TensorExecutor; + + Executor::run(Assign(dst, expr), d); + + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { + VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); + } +}; + +template +static void test_execute_chipping_rvalue(Device d) { + auto dims = RandomDims(1, 10); + Tensor src(dims); + src.setRandom(); + +#define TEST_CHIPPING(CHIP_DIM) \ + if (NumDims > (CHIP_DIM)) { \ + const auto offset = internal::random(0, dims[(CHIP_DIM)] - 1); \ + const auto expr = src.template chip<(CHIP_DIM)>(offset); \ + \ + Tensor golden; \ + golden = expr; \ + \ + Tensor dst(golden.dimensions()); \ + \ + using Assign = TensorAssignOp; \ + using Executor = internal::TensorExecutor; \ + \ + Executor::run(Assign(dst, expr), d); \ + \ + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { \ + VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); \ + } \ + } + + TEST_CHIPPING(0) + TEST_CHIPPING(1) + TEST_CHIPPING(2) + TEST_CHIPPING(3) + TEST_CHIPPING(4) + TEST_CHIPPING(5) + +#undef TEST_CHIPPING +}; + +template +static void test_execute_chipping_lvalue(Device d) { + auto dims = RandomDims(1, 10); + +#define TEST_CHIPPING(CHIP_DIM) \ + if (NumDims > (CHIP_DIM)) { \ + /* Generate random data that we'll assign to the chipped tensor dim. */ \ + array src_dims; \ + for (int i = 0; i < NumDims - 1; ++i) { \ + int dim = i < (CHIP_DIM) ? i : i + 1; \ + src_dims[i] = dims[dim]; \ + } \ + \ + Tensor src(src_dims); \ + src.setRandom(); \ + \ + const auto offset = internal::random(0, dims[(CHIP_DIM)] - 1); \ + \ + /* Generate random data to fill non-chipped dimensions*/ \ + Tensor random(dims); \ + random.setRandom(); \ + \ + Tensor golden(dims); \ + golden = random; \ + golden.template chip<(CHIP_DIM)>(offset) = src; \ + \ + Tensor dst(dims); \ + dst = random; \ + auto expr = dst.template chip<(CHIP_DIM)>(offset); \ + \ + using Assign = TensorAssignOp; \ + using Executor = internal::TensorExecutor; \ + \ + Executor::run(Assign(expr, src), d); \ + \ + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { \ + VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); \ + } \ + } + + TEST_CHIPPING(0) + TEST_CHIPPING(1) + TEST_CHIPPING(2) + TEST_CHIPPING(3) + TEST_CHIPPING(4) + TEST_CHIPPING(5) + +#undef TEST_CHIPPING +}; + +template +static void test_execute_shuffle_rvalue(Device d) { + static constexpr int Options = 0 | Layout; + + auto dims = RandomDims(1, 10); + Tensor src(dims); + src.setRandom(); + + // Create a random dimension re-ordering/shuffle. + std::vector shuffle; + for (int i = 0; i < NumDims; ++i) shuffle.push_back(i); + std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937()); + + const auto expr = src.shuffle(shuffle); + + // We assume that shuffling on a default device is tested and correct, so + // we can rely on it to verify correctness of tensor executor and tiling. + Tensor golden; + golden = expr; + + // Now do the shuffling using configured tensor executor. + Tensor dst(golden.dimensions()); + + using Assign = TensorAssignOp; + using Executor = + internal::TensorExecutor; + + Executor::run(Assign(dst, expr), d); + + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { + VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); + } +} + +template +static void test_execute_shuffle_lvalue(Device d) { + static constexpr int Options = 0 | Layout; + + auto dims = RandomDims(5, 10); + Tensor src(dims); + src.setRandom(); + + // Create a random dimension re-ordering/shuffle. + std::vector shuffle; + for (int i = 0; i < NumDims; ++i) shuffle.push_back(i); + std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937()); + + array shuffled_dims; + for (int i = 0; i < NumDims; ++i) shuffled_dims[shuffle[i]] = dims[i]; + + // We assume that shuffling on a default device is tested and correct, so + // we can rely on it to verify correctness of tensor executor and tiling. + Tensor golden(shuffled_dims); + golden.shuffle(shuffle) = src; + + // Now do the shuffling using configured tensor executor. + Tensor dst(shuffled_dims); + + auto expr = dst.shuffle(shuffle); + + using Assign = TensorAssignOp; + using Executor = + internal::TensorExecutor; + + Executor::run(Assign(expr, src), d); + + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { + VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); + } +} + +template +static void test_execute_reduction(Device d) +{ + static_assert(NumDims >= 2); + + static constexpr int ReducedDims = NumDims - 2; + static constexpr int Options = 0 | Layout; + + auto dims = RandomDims(5, 10); + Tensor src(dims); + src.setRandom(); + + // Pick two random and unique reduction dimensions. + int reduction0 = internal::random(0, NumDims - 1); + int reduction1 = internal::random(0, NumDims - 1); + while (reduction0 == reduction1) { + reduction1 = internal::random(0, NumDims - 1); + } + + DSizes reduction_axis; + reduction_axis[0] = reduction0; + reduction_axis[1] = reduction1; + + Tensor golden = src.sum(reduction_axis); + + // Now do the reduction using configured tensor executor. + Tensor dst(golden.dimensions()); + + auto expr = src.sum(reduction_axis); + + using Assign = TensorAssignOp; + using Executor = + internal::TensorExecutor; + + Executor::run(Assign(dst, expr), d); + + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { + VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); + } +} + +template +static void test_execute_reshape(Device d) +{ + static_assert(NumDims >= 2); + + static constexpr int ReshapedDims = NumDims - 1; + static constexpr int Options = 0 | Layout; + + auto dims = RandomDims(5, 10); + Tensor src(dims); + src.setRandom(); + + // Multiple 0th dimension and then shuffle. + std::vector shuffle; + for (int i = 0; i < ReshapedDims; ++i) shuffle.push_back(i); + std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937()); + + DSizes reshaped_dims; + reshaped_dims[shuffle[0]] = dims[0] * dims[1]; + for (int i = 2; i < NumDims; ++i) reshaped_dims[shuffle[i]] = dims[i]; + + Tensor golden = src.reshape(reshaped_dims); + + // Now reshape using configured tensor executor. + Tensor dst(golden.dimensions()); + + auto expr = src.reshape(reshaped_dims); + + using Assign = TensorAssignOp; + using Executor = + internal::TensorExecutor; + + Executor::run(Assign(dst, expr), d); + + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { + VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); + } +} + +template +static void test_execute_slice_rvalue(Device d) +{ + static_assert(NumDims >= 2); + static constexpr int Options = 0 | Layout; + + auto dims = RandomDims(5, 10); + Tensor src(dims); + src.setRandom(); + + // Pick a random slice of src tensor. + auto slice_start = DSizes(RandomDims()); + auto slice_size = DSizes(RandomDims()); + + // Make sure that slice start + size do not overflow tensor dims. + for (int i = 0; i < NumDims; ++i) { + slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]); + slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]); + } + + Tensor golden = + src.slice(slice_start, slice_size); + + // Now reshape using configured tensor executor. + Tensor dst(golden.dimensions()); + + auto expr = src.slice(slice_start, slice_size); + + using Assign = TensorAssignOp; + using Executor = + internal::TensorExecutor; + + Executor::run(Assign(dst, expr), d); + + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { + VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); } } -#define CALL_SUBTEST_COMBINATIONS(NAME) \ - CALL_SUBTEST((NAME(default_device))); \ - CALL_SUBTEST((NAME(default_device))); \ - CALL_SUBTEST((NAME(default_device))); \ - CALL_SUBTEST((NAME(default_device))); \ - CALL_SUBTEST((NAME(default_device))); \ - CALL_SUBTEST((NAME(default_device))); \ - CALL_SUBTEST((NAME(default_device))); \ - CALL_SUBTEST((NAME(default_device))); \ - CALL_SUBTEST((NAME(tp_device))); \ - CALL_SUBTEST((NAME(tp_device))); \ - CALL_SUBTEST((NAME(tp_device))); \ - CALL_SUBTEST((NAME(tp_device))); \ - CALL_SUBTEST((NAME(tp_device))); \ - CALL_SUBTEST((NAME(tp_device))); \ - CALL_SUBTEST((NAME(tp_device))); \ - CALL_SUBTEST((NAME(tp_device))) +template +static void test_execute_slice_lvalue(Device d) +{ + static_assert(NumDims >= 2); + static constexpr int Options = 0 | Layout; + + auto dims = RandomDims(5, 10); + Tensor src(dims); + src.setRandom(); + + // Pick a random slice of src tensor. + auto slice_start = DSizes(RandomDims(1, 10)); + auto slice_size = DSizes(RandomDims(1, 10)); + + // Make sure that slice start + size do not overflow tensor dims. + for (int i = 0; i < NumDims; ++i) { + slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]); + slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]); + } + + Tensor slice(slice_size); + slice.setRandom(); + + // Asign a slice using default executor. + Tensor golden = src; + golden.slice(slice_start, slice_size) = slice; + + // And using configured execution strategy. + Tensor dst = src; + auto expr = dst.slice(slice_start, slice_size); + + using Assign = TensorAssignOp; + using Executor = + internal::TensorExecutor; + + Executor::run(Assign(expr, slice), d); + + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { + VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); + } +} + +#define CALL_SUBTEST_COMBINATIONS(NAME, T, NUM_DIMS) \ + CALL_SUBTEST((NAME(default_device))); \ + CALL_SUBTEST((NAME(default_device))); \ + CALL_SUBTEST((NAME(default_device))); \ + CALL_SUBTEST((NAME(default_device))); \ + CALL_SUBTEST((NAME(default_device))); \ + CALL_SUBTEST((NAME(default_device))); \ + CALL_SUBTEST((NAME(default_device))); \ + CALL_SUBTEST((NAME(default_device))); \ + CALL_SUBTEST((NAME(tp_device))); \ + CALL_SUBTEST((NAME(tp_device))); \ + CALL_SUBTEST((NAME(tp_device))); \ + CALL_SUBTEST((NAME(tp_device))); \ + CALL_SUBTEST((NAME(tp_device))); \ + CALL_SUBTEST((NAME(tp_device))); \ + CALL_SUBTEST((NAME(tp_device))); \ + CALL_SUBTEST((NAME(tp_device))) EIGEN_DECLARE_TEST(cxx11_tensor_executor) { Eigen::DefaultDevice default_device; @@ -81,7 +445,49 @@ EIGEN_DECLARE_TEST(cxx11_tensor_executor) { Eigen::ThreadPool tp(num_threads); Eigen::ThreadPoolDevice tp_device(&tp, num_threads); - CALL_SUBTEST_COMBINATIONS(test_execute_binary_expr); + CALL_SUBTEST_COMBINATIONS(test_execute_binary_expr, float, 3); + CALL_SUBTEST_COMBINATIONS(test_execute_binary_expr, float, 4); + CALL_SUBTEST_COMBINATIONS(test_execute_binary_expr, float, 5); + + CALL_SUBTEST_COMBINATIONS(test_execute_broadcasting, float, 3); + CALL_SUBTEST_COMBINATIONS(test_execute_broadcasting, float, 4); + CALL_SUBTEST_COMBINATIONS(test_execute_broadcasting, float, 5); + + CALL_SUBTEST_COMBINATIONS(test_execute_chipping_rvalue, float, 3); + CALL_SUBTEST_COMBINATIONS(test_execute_chipping_rvalue, float, 4); + CALL_SUBTEST_COMBINATIONS(test_execute_chipping_rvalue, float, 5); + + CALL_SUBTEST_COMBINATIONS(test_execute_chipping_lvalue, float, 3); + CALL_SUBTEST_COMBINATIONS(test_execute_chipping_lvalue, float, 4); + CALL_SUBTEST_COMBINATIONS(test_execute_chipping_lvalue, float, 5); + + CALL_SUBTEST_COMBINATIONS(test_execute_shuffle_rvalue, float, 3); + CALL_SUBTEST_COMBINATIONS(test_execute_shuffle_rvalue, float, 4); + CALL_SUBTEST_COMBINATIONS(test_execute_shuffle_rvalue, float, 5); + + CALL_SUBTEST_COMBINATIONS(test_execute_shuffle_lvalue, float, 3); + CALL_SUBTEST_COMBINATIONS(test_execute_shuffle_lvalue, float, 4); + CALL_SUBTEST_COMBINATIONS(test_execute_shuffle_lvalue, float, 5); + + CALL_SUBTEST_COMBINATIONS(test_execute_reduction, float, 2); + CALL_SUBTEST_COMBINATIONS(test_execute_reduction, float, 3); + CALL_SUBTEST_COMBINATIONS(test_execute_reduction, float, 4); + CALL_SUBTEST_COMBINATIONS(test_execute_reduction, float, 5); + + CALL_SUBTEST_COMBINATIONS(test_execute_reshape, float, 2); + CALL_SUBTEST_COMBINATIONS(test_execute_reshape, float, 3); + CALL_SUBTEST_COMBINATIONS(test_execute_reshape, float, 4); + CALL_SUBTEST_COMBINATIONS(test_execute_reshape, float, 5); + + CALL_SUBTEST_COMBINATIONS(test_execute_slice_rvalue, float, 2); + CALL_SUBTEST_COMBINATIONS(test_execute_slice_rvalue, float, 3); + CALL_SUBTEST_COMBINATIONS(test_execute_slice_rvalue, float, 4); + CALL_SUBTEST_COMBINATIONS(test_execute_slice_rvalue, float, 5); + + CALL_SUBTEST_COMBINATIONS(test_execute_slice_lvalue, float, 2); + CALL_SUBTEST_COMBINATIONS(test_execute_slice_lvalue, float, 3); + CALL_SUBTEST_COMBINATIONS(test_execute_slice_lvalue, float, 4); + CALL_SUBTEST_COMBINATIONS(test_execute_slice_lvalue, float, 5); } #undef CALL_SUBTEST_COMBINATIONS -- cgit v1.2.3