aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-05-17 09:24:35 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-05-17 09:24:35 -0700
commite7e64c327785ceffd9da018ee265d761991f9685 (patch)
treedf61359ed916c4eb95acc0ab2f13821dde037cee /unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
parent5fa27574dd29fc5753a0b9d39b1fe5c15668e658 (diff)
Enable the use of the packet api to evaluate tensor broadcasts. This speed things up quite a bit:
Before" M_broadcasting/10 500000 3690 27.10 MFlops/s BM_broadcasting/80 500000 4014 1594.24 MFlops/s BM_broadcasting/640 100000 14770 27731.35 MFlops/s BM_broadcasting/4K 5000 632711 39512.48 MFlops/s After: BM_broadcasting/10 500000 4287 23.33 MFlops/s BM_broadcasting/80 500000 4455 1436.41 MFlops/s BM_broadcasting/640 200000 10195 40173.01 MFlops/s BM_broadcasting/4K 5000 423746 58997.57 MFlops/s
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h2
1 files changed, 1 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index 9ea1c78eb..5d67f69f3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -106,7 +106,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
enum {
- IsAligned = false,
+ IsAligned = true,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = false