From ab773c7e914633ec4a3ee1f7cdea8b168d3bce1a Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 24 Apr 2020 17:29:25 -0700 Subject: Extend support for Packet16b: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add ptranspose<*,4> to support matmul and add unit test for Matrix * Matrix * work around a bug in slicing of Tensor. * Add tensor tests This speeds up matmul for boolean matrices by about 10x name old time/op new time/op delta BM_MatMul/8 267ns ± 0% 479ns ± 0% +79.25% (p=0.008 n=5+5) BM_MatMul/32 6.42µs ± 0% 0.87µs ± 0% -86.50% (p=0.008 n=5+5) BM_MatMul/64 43.3µs ± 0% 5.9µs ± 0% -86.42% (p=0.008 n=5+5) BM_MatMul/128 315µs ± 0% 44µs ± 0% -85.98% (p=0.008 n=5+5) BM_MatMul/256 2.41ms ± 0% 0.34ms ± 0% -85.68% (p=0.008 n=5+5) BM_MatMul/512 18.8ms ± 0% 2.7ms ± 0% -85.53% (p=0.008 n=5+5) BM_MatMul/1k 149ms ± 0% 22ms ± 0% -85.40% (p=0.008 n=5+5) --- unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'unsupported/Eigen/CXX11') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index d04dcae17..f107d1b19 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -456,7 +456,9 @@ struct TensorEvaluator, Devi // slice offsets and sizes. IsAligned = false, PacketAccess = TensorEvaluator::PacketAccess, - BlockAccess = TensorEvaluator::BlockAccess, + BlockAccess = TensorEvaluator::BlockAccess && + // FIXME: Temporary workaround for bug in slicing of bool tensors. + !internal::is_same::type, bool>::value, PreferBlockAccess = true, Layout = TensorEvaluator::Layout, CoordAccess = false, @@ -525,7 +527,6 @@ struct TensorEvaluator, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { m_impl.evalSubExprsIfNeeded(NULL); if (!NumTraits::type>::RequireInitialization @@ -559,14 +560,14 @@ struct TensorEvaluator, Devi } return true; } - + #ifdef EIGEN_USE_THREADS template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( EvaluatorPointerType data, EvalSubExprsCallback done) { m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); }); } -#endif // EIGEN_USE_THREADS +#endif // EIGEN_USE_THREADS EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); -- cgit v1.2.3