diff options
author | Gael Guennebaud <g.gael@free.fr> | 2016-04-12 23:03:03 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2016-04-12 23:03:03 +0200 |
commit | b67c983291706bd5cc776b3202cacb6ca6508b76 (patch) | |
tree | 9375b4c17b23a8c58383d434328dd6f321ef30e7 /Eigen/src/Core/ProductEvaluators.h | |
parent | e3a184785c3ea106b8f74471a325ea0945127639 (diff) |
Enable the use of half-packet in coeff-based product.
For instance, Matrix4f*Vector4f is now vectorized again when using AVX.
Diffstat (limited to 'Eigen/src/Core/ProductEvaluators.h')
-rw-r--r-- | Eigen/src/Core/ProductEvaluators.h | 36 |
1 files changed, 21 insertions, 15 deletions
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 3ce86e8cd..443a58c27 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -410,8 +410,6 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, typedef Product<Lhs, Rhs, LazyProduct> XprType; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketScalar PacketScalar; - typedef typename XprType::PacketReturnType PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr) @@ -437,16 +435,20 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, typedef evaluator<LhsNestedCleaned> LhsEtorType; typedef evaluator<RhsNestedCleaned> RhsEtorType; - + enum { RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, - MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, - - PacketSize = packet_traits<Scalar>::size, + MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime + }; + + typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType; + typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType; + enum { + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost @@ -459,19 +461,23 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, LhsFlags = LhsEtorType::Flags, RhsFlags = RhsEtorType::Flags, - LhsAlignment = LhsEtorType::Alignment, - RhsAlignment = RhsEtorType::Alignment, - LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, + + LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size, + RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size, + + // + LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))), + RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))), SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value, CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ), + && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % RhsVecPacketSize) == 0) ), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ), + && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % LhsVecPacketSize) == 0) ), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -491,10 +497,10 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, : 0, /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside - * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner - * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect - * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. - */ + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) |