From 861d18d5532546ddb0cd2bff8795eda080ce0c85 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 12 Jul 2008 22:59:34 +0000 Subject: * Optimization: added a specialization of Block for xpr with DirectAccessBit * some simplifications and fixes in cache friendly products --- Eigen/src/Core/CacheFriendlyProduct.h | 99 ++++++++++++++++------------------- 1 file changed, 46 insertions(+), 53 deletions(-) (limited to 'Eigen/src/Core/CacheFriendlyProduct.h') diff --git a/Eigen/src/Core/CacheFriendlyProduct.h b/Eigen/src/Core/CacheFriendlyProduct.h index a710d44d4..06b3f5876 100644 --- a/Eigen/src/Core/CacheFriendlyProduct.h +++ b/Eigen/src/Core/CacheFriendlyProduct.h @@ -367,7 +367,7 @@ static void ei_cache_friendly_product( * TODO: since rhs gets evaluated only once, no need to evaluate it */ template -EIGEN_DONT_INLINE static void ei_cache_friendly_product( +EIGEN_DONT_INLINE static void ei_cache_friendly_product_colmajor_times_vector( int size, const Scalar* lhs, int lhsStride, const RhsType& rhs, @@ -408,54 +408,34 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product( : alignmentStep==2 ? EvenAligned : FirstAligned; - // find how many column do we have to skip to be aligned with the result (if possible) + // find how many columns do we have to skip to be aligned with the result (if possible) int skipColumns=0; - for (; skipColumns0) + if (alignedSize>alignedStart) { switch(alignmentPattern) { @@ -475,10 +455,6 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product( _EIGEN_ACCUMULATE_PACKETS(,u,u,+PacketSize); if (peels>2) _EIGEN_ACCUMULATE_PACKETS(,u,u,+2*PacketSize); if (peels>3) _EIGEN_ACCUMULATE_PACKETS(,u,u,+3*PacketSize); - if (peels>4) _EIGEN_ACCUMULATE_PACKETS(,u,u,+4*PacketSize); - if (peels>5) _EIGEN_ACCUMULATE_PACKETS(,u,u,+5*PacketSize); - if (peels>6) _EIGEN_ACCUMULATE_PACKETS(,u,u,+6*PacketSize); - if (peels>7) _EIGEN_ACCUMULATE_PACKETS(,u,u,+7*PacketSize); } for (int j = peeledSize; j0) + for (int i=columnBound; i