aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2008-05-03 10:01:30 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2008-05-03 10:01:30 +0000
commit0545df21497a7e3dab190b42b7cde840c9439395 (patch)
tree76b6968938a6ece62830d4d31038f5beb0309333 /Eigen/src
parenta6655dd91aea66a7e617031e87ca7f34dce2a639 (diff)
slighly improved the cache friendly product to use mul-add only
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/Product.h26
1 files changed, 10 insertions, 16 deletions
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 04b6fb9c0..e7ee1dc6a 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -398,15 +398,12 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_lhs.coeff(k,j+3));
for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
{
- // FIXME the following could be implemented using only mul-add, check if this is still OK for SSE
res.writePacketCoeff(k,i,
- ei_padd(
- res.packetCoeff(k,i),
- ei_padd(
- ei_pmadd(tmp0, m_rhs.packetCoeff(j+0,i), ei_pmul(tmp1, m_rhs.packetCoeff(j+1,i))),
- ei_pmadd(tmp2, m_rhs.packetCoeff(j+2,i), ei_pmul(tmp3, m_rhs.packetCoeff(j+3,i)))
- )
- )
+ ei_pmadd(tmp0, m_rhs.packetCoeff(j+0,i),
+ ei_pmadd(tmp1, m_rhs.packetCoeff(j+1,i),
+ ei_pmadd(tmp2, m_rhs.packetCoeff(j+2,i),
+ ei_pmadd(tmp3, m_rhs.packetCoeff(j+3,i),
+ res.packetCoeff(k,i)))))
);
}
}
@@ -436,14 +433,11 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
{
res.writePacketCoeff(i,k,
- ei_padd(
- res.packetCoeff(i,k),
- ei_padd(
- ei_pmadd(tmp0, m_lhs.packetCoeff(i,j), ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
- ei_pmadd(tmp2, m_lhs.packetCoeff(i,j+2),ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3)))
-
- )
- )
+ ei_pmadd(tmp0, m_lhs.packetCoeff(i,j),
+ ei_pmadd(tmp1, m_lhs.packetCoeff(i,j+1),
+ ei_pmadd(tmp2, m_lhs.packetCoeff(i,j+2),
+ ei_pmadd(tmp3, m_lhs.packetCoeff(i,j+3),
+ res.packetCoeff(i,k)))))
);
}
}