diff options
author | 2008-05-03 10:01:30 +0000 | |
---|---|---|
committer | 2008-05-03 10:01:30 +0000 | |
commit | 0545df21497a7e3dab190b42b7cde840c9439395 (patch) | |
tree | 76b6968938a6ece62830d4d31038f5beb0309333 /Eigen/src/Core | |
parent | a6655dd91aea66a7e617031e87ca7f34dce2a639 (diff) |
slighly improved the cache friendly product to use mul-add only
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/Product.h | 26 |
1 files changed, 10 insertions, 16 deletions
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 04b6fb9c0..e7ee1dc6a 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -398,15 +398,12 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_lhs.coeff(k,j+3)); for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size) { - // FIXME the following could be implemented using only mul-add, check if this is still OK for SSE res.writePacketCoeff(k,i, - ei_padd( - res.packetCoeff(k,i), - ei_padd( - ei_pmadd(tmp0, m_rhs.packetCoeff(j+0,i), ei_pmul(tmp1, m_rhs.packetCoeff(j+1,i))), - ei_pmadd(tmp2, m_rhs.packetCoeff(j+2,i), ei_pmul(tmp3, m_rhs.packetCoeff(j+3,i))) - ) - ) + ei_pmadd(tmp0, m_rhs.packetCoeff(j+0,i), + ei_pmadd(tmp1, m_rhs.packetCoeff(j+1,i), + ei_pmadd(tmp2, m_rhs.packetCoeff(j+2,i), + ei_pmadd(tmp3, m_rhs.packetCoeff(j+3,i), + res.packetCoeff(k,i))))) ); } } @@ -436,14 +433,11 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size) { res.writePacketCoeff(i,k, - ei_padd( - res.packetCoeff(i,k), - ei_padd( - ei_pmadd(tmp0, m_lhs.packetCoeff(i,j), ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))), - ei_pmadd(tmp2, m_lhs.packetCoeff(i,j+2),ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))) - - ) - ) + ei_pmadd(tmp0, m_lhs.packetCoeff(i,j), + ei_pmadd(tmp1, m_lhs.packetCoeff(i,j+1), + ei_pmadd(tmp2, m_lhs.packetCoeff(i,j+2), + ei_pmadd(tmp3, m_lhs.packetCoeff(i,j+3), + res.packetCoeff(i,k))))) ); } } |