diff options
author | Benoit Jacob <benoitjacob@google.com> | 2018-11-27 16:53:14 -0500 |
---|---|---|
committer | Benoit Jacob <benoitjacob@google.com> | 2018-11-27 16:53:14 -0500 |
commit | a4159dba080f5621f19f814440553ba734c8e712 (patch) | |
tree | ae54b8ca8fc46cb990d169fbb912972f7a3a04f7 /Eigen/src/Core/products/GeneralBlockPanelKernel.h | |
parent | b131a4db2439ea1ca4ba86cbc86aa962914915c5 (diff) |
do not read buffers out of bounds -- load only the 4 bytes we know exist here. Could also have done a vld1_lane_f32 but doing so here, without the overhead of initializing the unused lane, would have triggered used-of-uninitialized-value errors in tools such as ASan. Note that this code is sub-optimal before or after this change: we should be reading either 2 or 4 float32 values per load-instruction (2 for ARM in-order cores with an affinity for 8-byte loads; 4 for ARM out-of-order cores able to dual-issue 16-byte load instructions with arithmetic instructions). Before or after this patch, we are only loading 4 bytes of useful data here (even if before this patch, we were technically loading 8, only to use only the 4 first).
Diffstat (limited to 'Eigen/src/Core/products/GeneralBlockPanelKernel.h')
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 5619a4588..9ca865bd1 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -859,7 +859,7 @@ template<> struct gebp_traits <float, float, false, false,Architecture::NEON> : gebp_traits<float,float,false,false,Architecture::Generic> { - typedef float32x2_t RhsPacket; + typedef float RhsPacket; EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3) { @@ -871,7 +871,7 @@ struct gebp_traits <float, float, false, false,Architecture::NEON> EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { - dest = vld1_f32(b); + dest = *b; } EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const @@ -881,7 +881,7 @@ struct gebp_traits <float, float, false, false,Architecture::NEON> EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/) const { - c = vfmaq_lane_f32(c, a, b, 0); + c = vfmaq_n_f32(c, a, b); } }; |