From e7e64c327785ceffd9da018ee265d761991f9685 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 17 May 2016 09:24:35 -0700 Subject: Enable the use of the packet api to evaluate tensor broadcasts. This speed things up quite a bit: Before" M_broadcasting/10 500000 3690 27.10 MFlops/s BM_broadcasting/80 500000 4014 1594.24 MFlops/s BM_broadcasting/640 100000 14770 27731.35 MFlops/s BM_broadcasting/4K 5000 632711 39512.48 MFlops/s After: BM_broadcasting/10 500000 4287 23.33 MFlops/s BM_broadcasting/80 500000 4455 1436.41 MFlops/s BM_broadcasting/640 200000 10195 40173.01 MFlops/s BM_broadcasting/4K 5000 423746 58997.57 MFlops/s --- unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 9ea1c78eb..5d67f69f3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -106,7 +106,7 @@ struct TensorEvaluator, Device> static const int PacketSize = internal::unpacket_traits::size; enum { - IsAligned = false, + IsAligned = true, PacketAccess = TensorEvaluator::PacketAccess, Layout = TensorEvaluator::Layout, RawAccess = false -- cgit v1.2.3