aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <benoitjacob@google.com>2015-02-23 16:55:17 -0500
committerGravatar Benoit Jacob <benoitjacob@google.com>2015-02-23 16:55:17 -0500
commit26275b250a271b27132e41486de5bb5b18e8365a (patch)
tree26d6691419faf098d62e4e94ec16a36846142769 /Eigen
parent488874781b7349bb91961de5c7b63af93124c504 (diff)
Fix my recent prefetch changes:
- the first prefetch is actually harmful on Haswell with FMA, but it is the most beneficial on ARM. - the second prefetch... I was very stupid and multiplied by sizeof(scalar) and offset of a scalar* pointer. The old offset was 64; pk = 8, so 64=pk*8. So this effectively restores the older offset. Actually, there were two prefetches here, one with offset 48 and one with offset 64. I could not confirm any benefit from this strange 48 offset on either the haswell or my ARM device.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h4
1 files changed, 2 insertions, 2 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index ccd906540..93bd6ab53 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -855,7 +855,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
internal::prefetch(blA+(3*K+16)*LhsProgress); \
- internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \
+ if (EIGEN_ARCH_ARM) internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
@@ -878,7 +878,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \
} while(false)
- internal::prefetch(blB + 4 * pk * sizeof(RhsScalar)); /* Bug 953 */
+ internal::prefetch(blB + 8 * pk); /* Bug 953 */
EIGEN_GEBP_ONESTEP(0);
EIGEN_GEBP_ONESTEP(1);
EIGEN_GEBP_ONESTEP(2);