aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/AssignEvaluator.h41
-rw-r--r--Eigen/src/Core/ProductEvaluators.h4
2 files changed, 43 insertions, 2 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 83cec500f..6e6452099 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -699,6 +699,26 @@ protected:
DstXprType& m_dstExpr;
};
+// Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the
+// PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used
+// when computing the product.
+
+template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
+class restricted_packet_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn>
+{
+protected:
+ typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;
+ public:
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::DstXprType DstXprType;
+ typedef typename find_best_packet<Scalar, 4>::type PacketType;
+
+ EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
+ : Base(dst, src, func, dstExpr)
+ {
+ }
+ };
+
/***************************************************************************
* Part 5 : Entry point for dense rectangular assignment
***************************************************************************/
@@ -837,6 +857,27 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
}
+
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
+{
+ typedef evaluator<Dst> DstEvaluatorType;
+ typedef evaluator<Src> SrcEvaluatorType;
+ typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Func> Kernel;
+
+ EIGEN_STATIC_ASSERT_LVALUE(Dst)
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
+
+ SrcEvaluatorType srcEvaluator(src);
+ resize_if_allowed(dst, src, func);
+
+ DstEvaluatorType dstEvaluator(dst);
+ Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
+
+ dense_assignment_loop<Kernel>::run(kernel);
+}
+
template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias(Dst& dst, const Src& src)
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 0762d9e8b..246bca3e5 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -424,7 +424,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
void eval_dynamic(Dst& dst, const CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, Xpr2>& lhs, const Rhs& rhs, const Func &func)
{
- call_assignment_no_alias(dst, lhs.lhs().functor().m_other * lhs.rhs().lazyProduct(rhs), func);
+ call_restricted_packet_assignment_no_alias(dst, lhs.lhs().functor().m_other * lhs.rhs().lazyProduct(rhs), func);
}
// Here, we we always have LhsT==Lhs, but we need to make it a template type to make the above
@@ -433,7 +433,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void eval_dynamic(Dst& dst, const LhsT& lhs, const Rhs& rhs, const Func &func)
{
- call_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
+ call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
}