From 5fa27574dd29fc5753a0b9d39b1fe5c15668e658 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 17 May 2016 09:17:26 -0700 Subject: Allow vectorized padding on GPU. This helps speed things up a little Before: BM_padding/10 5000000 460 217.03 MFlops/s BM_padding/80 5000000 460 13899.40 MFlops/s BM_padding/640 5000000 461 888421.17 MFlops/s BM_padding/4K 5000000 460 54316322.55 MFlops/s After: BM_padding/10 5000000 454 220.20 MFlops/s BM_padding/80 5000000 455 14039.86 MFlops/s BM_padding/640 5000000 452 904968.83 MFlops/s BM_padding/4K 5000000 411 60750049.21 MFlops/s --- unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 742339f9e..266eea0a0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -93,7 +93,7 @@ struct TensorEvaluator, Device static const int PacketSize = internal::unpacket_traits::size; enum { - IsAligned = false, + IsAligned = true, PacketAccess = TensorEvaluator::PacketAccess, Layout = TensorEvaluator::Layout, CoordAccess = true, -- cgit v1.2.3