From 8d0441052e7fac530fad12016f53f5b234a68d47 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 31 Mar 2014 10:42:19 +0200 Subject: Finally, prefetching seems to help getting more stable performance --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'Eigen/src') diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index a9e42c8aa..d9e659c9a 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -639,7 +639,7 @@ void gebp_kernel for(Index i=0; i for(Index i=peeled_mc; i { Index i = rows-1; const LhsScalar* blA = &blockA[i*strideA+offsetA]; + prefetch(&blA[0]); const RhsScalar* blB = &blockB[j2*strideB+offsetB*8]; EIGEN_ASM_COMMENT("begin_vectorized_multiplication_of_last_rows 8"); @@ -863,6 +865,7 @@ void gebp_kernel for(Index i=peeled_mc; i for(Index i=0; i for(Index i=peeled_mc; i traits.initAcc(C0); const LhsScalar* blA = &blockA[i*strideA+offsetA*mr]; + prefetch(&blA[0]); const RhsScalar* blB = &blockB[j2*strideB+offsetB]; for(Index k=0; k for(Index i=peeled_mc; i