Extend support for Packet16b:

* Add ptranspose<*,4> to support matmul and add unit test for Matrix<bool> * Matrix<bool> * work around a bug in slicing of Tensor<bool>. * Add tensor tests This speeds up matmul for boolean matrices by about 10x name old time/op new time/op delta BM_MatMul<bool>/8 267ns ± 0% 479ns ± 0% +79.25% (p=0.008 n=5+5) BM_MatMul<bool>/32 6.42µs ± 0% 0.87µs ± 0% -86.50% (p=0.008 n=5+5) BM_MatMul<bool>/64 43.3µs ± 0% 5.9µs ± 0% -86.42% (p=0.008 n=5+5) BM_MatMul<bool>/128 315µs ± 0% 44µs ± 0% -85.98% (p=0.008 n=5+5) BM_MatMul<bool>/256 2.41ms ± 0% 0.34ms ± 0% -85.68% (p=0.008 n=5+5) BM_MatMul<bool>/512 18.8ms ± 0% 2.7ms ± 0% -85.53% (p=0.008 n=5+5) BM_MatMul<bool>/1k 149ms ± 0% 22ms ± 0% -85.40% (p=0.008 n=5+5)
author: Rasmus Munk Larsen <rmlarsen@google.com> 2020-04-24 17:29:25 -0700
committer: Rasmus Munk Larsen <rmlarsen@google.com> 2020-04-28 16:12:47 +0000
commit: ab773c7e914633ec4a3ee1f7cdea8b168d3bce1a (patch)
tree: 5a82a3071e3c5de39e934ce144023c20f62b2de4 /unsupported/Eigen/CXX11
parent: b47c7779937c1984b7cd2f1d2f8df33d67c396f7 (diff)
1 files changed, 5 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index d04dcae17..f107d1b19 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -456,7 +456,9 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
     // slice offsets and sizes.
     IsAligned         = false,
     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess,
+    BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess &&
+                        // FIXME: Temporary workaround for bug in slicing of bool tensors.
+                        !internal::is_same<typename internal::remove_const<Scalar>::type, bool>::value,
     PreferBlockAccess = true,
     Layout            = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess       = false,
@@ -525,7 +527,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
 
-
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
     m_impl.evalSubExprsIfNeeded(NULL);
     if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization
@@ -559,14 +560,14 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
     }
     return true;
   }
-  
+
 #ifdef EIGEN_USE_THREADS
   template <typename EvalSubExprsCallback>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
       EvaluatorPointerType data, EvalSubExprsCallback done) {
     m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
   }
-#endif  // EIGEN_USE_THREADS  
+#endif  // EIGEN_USE_THREADS
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
     m_impl.cleanup();
author	Rasmus Munk Larsen <rmlarsen@google.com>	2020-04-24 17:29:25 -0700
committer	Rasmus Munk Larsen <rmlarsen@google.com>	2020-04-28 16:12:47 +0000
commit	ab773c7e914633ec4a3ee1f7cdea8b168d3bce1a (patch)
tree	5a82a3071e3c5de39e934ce144023c20f62b2de4 /unsupported/Eigen/CXX11
parent	b47c7779937c1984b7cd2f1d2f8df33d67c396f7 (diff)