From ab773c7e914633ec4a3ee1f7cdea8b168d3bce1a Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 24 Apr 2020 17:29:25 -0700 Subject: Extend support for Packet16b: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add ptranspose<*,4> to support matmul and add unit test for Matrix * Matrix * work around a bug in slicing of Tensor. * Add tensor tests This speeds up matmul for boolean matrices by about 10x name old time/op new time/op delta BM_MatMul/8 267ns ± 0% 479ns ± 0% +79.25% (p=0.008 n=5+5) BM_MatMul/32 6.42µs ± 0% 0.87µs ± 0% -86.50% (p=0.008 n=5+5) BM_MatMul/64 43.3µs ± 0% 5.9µs ± 0% -86.42% (p=0.008 n=5+5) BM_MatMul/128 315µs ± 0% 44µs ± 0% -85.98% (p=0.008 n=5+5) BM_MatMul/256 2.41ms ± 0% 0.34ms ± 0% -85.68% (p=0.008 n=5+5) BM_MatMul/512 18.8ms ± 0% 2.7ms ± 0% -85.53% (p=0.008 n=5+5) BM_MatMul/1k 149ms ± 0% 22ms ± 0% -85.40% (p=0.008 n=5+5) --- unsupported/test/cxx11_tensor_contraction.cpp | 68 ++++++++++++++------------- 1 file changed, 36 insertions(+), 32 deletions(-) (limited to 'unsupported/test/cxx11_tensor_contraction.cpp') diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp index 2fd128121..3b5c6a13c 100644 --- a/unsupported/test/cxx11_tensor_contraction.cpp +++ b/unsupported/test/cxx11_tensor_contraction.cpp @@ -562,36 +562,40 @@ static void test_large_contraction_with_output_kernel() { EIGEN_DECLARE_TEST(cxx11_tensor_contraction) { - CALL_SUBTEST(test_evals()); - CALL_SUBTEST(test_evals()); - CALL_SUBTEST(test_scalar()); - CALL_SUBTEST(test_scalar()); - CALL_SUBTEST(test_multidims()); - CALL_SUBTEST(test_multidims()); - CALL_SUBTEST(test_holes()); - CALL_SUBTEST(test_holes()); - CALL_SUBTEST(test_full_redux()); - CALL_SUBTEST(test_full_redux()); - CALL_SUBTEST(test_contraction_of_contraction()); - CALL_SUBTEST(test_contraction_of_contraction()); - CALL_SUBTEST(test_expr()); - CALL_SUBTEST(test_expr()); - CALL_SUBTEST(test_out_of_order_contraction()); - CALL_SUBTEST(test_out_of_order_contraction()); - CALL_SUBTEST(test_consistency()); - CALL_SUBTEST(test_consistency()); - CALL_SUBTEST(test_large_contraction()); - CALL_SUBTEST(test_large_contraction()); - CALL_SUBTEST(test_matrix_vector()); - CALL_SUBTEST(test_matrix_vector()); - CALL_SUBTEST(test_tensor_vector()); - CALL_SUBTEST(test_tensor_vector()); - CALL_SUBTEST(test_small_blocking_factors()); - CALL_SUBTEST(test_small_blocking_factors()); - CALL_SUBTEST(test_tensor_product()); - CALL_SUBTEST(test_tensor_product()); - CALL_SUBTEST(test_const_inputs()); - CALL_SUBTEST(test_const_inputs()); - CALL_SUBTEST(test_large_contraction_with_output_kernel()); - CALL_SUBTEST(test_large_contraction_with_output_kernel()); + CALL_SUBTEST_1(test_evals()); + CALL_SUBTEST_1(test_evals()); + CALL_SUBTEST_1(test_scalar()); + CALL_SUBTEST_1(test_scalar()); + CALL_SUBTEST_2(test_multidims()); + CALL_SUBTEST_2(test_multidims()); + CALL_SUBTEST_2(test_holes()); + CALL_SUBTEST_2(test_holes()); + CALL_SUBTEST_3(test_full_redux()); + CALL_SUBTEST_3(test_full_redux()); + CALL_SUBTEST_3(test_contraction_of_contraction()); + CALL_SUBTEST_3(test_contraction_of_contraction()); + CALL_SUBTEST_4(test_expr()); + CALL_SUBTEST_4(test_expr()); + CALL_SUBTEST_4(test_out_of_order_contraction()); + CALL_SUBTEST_4(test_out_of_order_contraction()); + CALL_SUBTEST_5(test_consistency()); + CALL_SUBTEST_5(test_consistency()); + CALL_SUBTEST_5(test_large_contraction()); + CALL_SUBTEST_5(test_large_contraction()); + CALL_SUBTEST_6(test_matrix_vector()); + CALL_SUBTEST_6(test_matrix_vector()); + CALL_SUBTEST_6(test_tensor_vector()); + CALL_SUBTEST_6(test_tensor_vector()); + CALL_SUBTEST_7(test_small_blocking_factors()); + CALL_SUBTEST_7(test_small_blocking_factors()); + CALL_SUBTEST_7(test_tensor_product()); + CALL_SUBTEST_7(test_tensor_product()); + CALL_SUBTEST_8(test_const_inputs()); + CALL_SUBTEST_8(test_const_inputs()); + CALL_SUBTEST_8(test_large_contraction_with_output_kernel()); + CALL_SUBTEST_8(test_large_contraction_with_output_kernel()); + + // Force CMake to split this test. + // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8 + } -- cgit v1.2.3