From ab773c7e914633ec4a3ee1f7cdea8b168d3bce1a Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 24 Apr 2020 17:29:25 -0700 Subject: Extend support for Packet16b: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add ptranspose<*,4> to support matmul and add unit test for Matrix * Matrix * work around a bug in slicing of Tensor. * Add tensor tests This speeds up matmul for boolean matrices by about 10x name old time/op new time/op delta BM_MatMul/8 267ns ± 0% 479ns ± 0% +79.25% (p=0.008 n=5+5) BM_MatMul/32 6.42µs ± 0% 0.87µs ± 0% -86.50% (p=0.008 n=5+5) BM_MatMul/64 43.3µs ± 0% 5.9µs ± 0% -86.42% (p=0.008 n=5+5) BM_MatMul/128 315µs ± 0% 44µs ± 0% -85.98% (p=0.008 n=5+5) BM_MatMul/256 2.41ms ± 0% 0.34ms ± 0% -85.68% (p=0.008 n=5+5) BM_MatMul/512 18.8ms ± 0% 2.7ms ± 0% -85.53% (p=0.008 n=5+5) BM_MatMul/1k 149ms ± 0% 22ms ± 0% -85.40% (p=0.008 n=5+5) --- unsupported/test/cxx11_tensor_block_eval.cpp | 94 +++++++++++++++++++--------- 1 file changed, 64 insertions(+), 30 deletions(-) (limited to 'unsupported/test/cxx11_tensor_block_eval.cpp') diff --git a/unsupported/test/cxx11_tensor_block_eval.cpp b/unsupported/test/cxx11_tensor_block_eval.cpp index 226c495aa..a7a49fa1f 100644 --- a/unsupported/test/cxx11_tensor_block_eval.cpp +++ b/unsupported/test/cxx11_tensor_block_eval.cpp @@ -233,7 +233,7 @@ static void test_eval_tensor_binary_expr_block() { rhs.setRandom(); VerifyBlockEvaluator( - lhs + rhs, [&dims]() { return RandomBlock(dims, 1, 10); }); + lhs * rhs, [&dims]() { return RandomBlock(dims, 1, 10); }); } template @@ -274,7 +274,7 @@ static void test_eval_tensor_broadcast() { // Check that desc.destination() memory is not shared between two broadcast // materializations. VerifyBlockEvaluator( - input.broadcast(bcast) + input.square().broadcast(bcast), + input.broadcast(bcast) * input.square().broadcast(bcast), [&bcasted_dims]() { return SkewedInnerBlock(bcasted_dims); }); } @@ -509,7 +509,7 @@ static void test_eval_tensor_reshape_with_bcast() { DSizes dims(dim, dim); VerifyBlockEvaluator( - lhs.reshape(reshapeLhs).broadcast(bcastLhs) + + lhs.reshape(reshapeLhs).broadcast(bcastLhs) * rhs.reshape(reshapeRhs).broadcast(bcastRhs), [dims]() { return SkewedInnerBlock(dims); }); } @@ -529,11 +529,11 @@ static void test_eval_tensor_forced_eval() { DSizes dims(dim, dim); VerifyBlockEvaluator( - (lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims), + (lhs.broadcast(bcastLhs) * rhs.broadcast(bcastRhs)).eval().reshape(dims), [dims]() { return SkewedInnerBlock(dims); }); VerifyBlockEvaluator( - (lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims), + (lhs.broadcast(bcastLhs) * rhs.broadcast(bcastRhs)).eval().reshape(dims), [dims]() { return RandomBlock(dims, 1, 50); }); } @@ -755,7 +755,39 @@ static void test_assign_to_tensor_shuffle() { #define CALL_SUBTEST_PART(PART) \ CALL_SUBTEST_##PART -#define CALL_SUBTESTS_DIMS_LAYOUTS(PART, NAME) \ +#define CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(PART, NAME) \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())) + +#define CALL_SUBTESTS_DIMS_LAYOUTS(PART, NAME) \ CALL_SUBTEST_PART(PART)((NAME())); \ CALL_SUBTEST_PART(PART)((NAME())); \ CALL_SUBTEST_PART(PART)((NAME())); \ @@ -767,36 +799,38 @@ static void test_assign_to_tensor_shuffle() { CALL_SUBTEST_PART(PART)((NAME())); \ CALL_SUBTEST_PART(PART)((NAME())) -#define CALL_SUBTESTS_LAYOUTS(PART, NAME) \ +#define CALL_SUBTESTS_LAYOUTS_TYPES(PART, NAME) \ CALL_SUBTEST_PART(PART)((NAME())); \ - CALL_SUBTEST_PART(PART)((NAME())) + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())); \ + CALL_SUBTEST_PART(PART)((NAME())) EIGEN_DECLARE_TEST(cxx11_tensor_block_eval) { // clang-format off - CALL_SUBTESTS_DIMS_LAYOUTS(1, test_eval_tensor_block); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(1, test_eval_tensor_block); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(1, test_eval_tensor_binary_expr_block); CALL_SUBTESTS_DIMS_LAYOUTS(1, test_eval_tensor_unary_expr_block); - CALL_SUBTESTS_DIMS_LAYOUTS(1, test_eval_tensor_binary_expr_block); CALL_SUBTESTS_DIMS_LAYOUTS(2, test_eval_tensor_binary_with_unary_expr_block); - CALL_SUBTESTS_DIMS_LAYOUTS(2, test_eval_tensor_broadcast); - CALL_SUBTESTS_DIMS_LAYOUTS(2, test_eval_tensor_reshape); - CALL_SUBTESTS_DIMS_LAYOUTS(3, test_eval_tensor_cast); - CALL_SUBTESTS_DIMS_LAYOUTS(3, test_eval_tensor_select); - CALL_SUBTESTS_DIMS_LAYOUTS(3, test_eval_tensor_padding); - CALL_SUBTESTS_DIMS_LAYOUTS(4, test_eval_tensor_chipping); - CALL_SUBTESTS_DIMS_LAYOUTS(4, test_eval_tensor_generator); - CALL_SUBTESTS_DIMS_LAYOUTS(4, test_eval_tensor_reverse); - CALL_SUBTESTS_DIMS_LAYOUTS(5, test_eval_tensor_slice); - CALL_SUBTESTS_DIMS_LAYOUTS(5, test_eval_tensor_shuffle); - - CALL_SUBTESTS_LAYOUTS(6, test_eval_tensor_reshape_with_bcast); - CALL_SUBTESTS_LAYOUTS(6, test_eval_tensor_forced_eval); - CALL_SUBTESTS_LAYOUTS(6, test_eval_tensor_chipping_of_bcast); - - CALL_SUBTESTS_DIMS_LAYOUTS(7, test_assign_to_tensor); - CALL_SUBTESTS_DIMS_LAYOUTS(7, test_assign_to_tensor_reshape); - CALL_SUBTESTS_DIMS_LAYOUTS(7, test_assign_to_tensor_chipping); - CALL_SUBTESTS_DIMS_LAYOUTS(8, test_assign_to_tensor_slice); - CALL_SUBTESTS_DIMS_LAYOUTS(8, test_assign_to_tensor_shuffle); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(2, test_eval_tensor_broadcast); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(2, test_eval_tensor_reshape); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_cast); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_select); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_padding); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_chipping); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_generator); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_reverse); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(5, test_eval_tensor_slice); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(5, test_eval_tensor_shuffle); + + CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_reshape_with_bcast); + CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_forced_eval); + CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_chipping_of_bcast); + + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor_reshape); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor_chipping); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(8, test_assign_to_tensor_slice); + CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(8, test_assign_to_tensor_shuffle); // Force CMake to split this test. // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8 -- cgit v1.2.3