diff options
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 11 | ||||
-rw-r--r-- | test/evaluators.cpp | 25 |
2 files changed, 32 insertions, 4 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 93fb80f12..767b67f0b 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -24,7 +24,7 @@ namespace internal { // copy_using_evaluator_traits is based on assign_traits -template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc> +template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1> struct copy_using_evaluator_traits { typedef typename DstEvaluator::XprType Dst; @@ -51,13 +51,15 @@ private: InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) : int(Dst::MaxRowsAtCompileTime), + RestrictedInnerSize = InnerSize == -1 ? MaxPacketSize : InnerSize, + RestrictedLinearSize = Dst::SizeAtCompileTime == -1 ? MaxPacketSize : Dst::SizeAtCompileTime, OuterStride = int(outer_stride_at_compile_time<Dst>::ret), MaxSizeAtCompileTime = Dst::SizeAtCompileTime }; // TODO distinguish between linear traversal and inner-traversals - typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType; - typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType; + typedef typename find_best_packet<DstScalar,RestrictedLinearSize>::type LinearPacketType; + typedef typename find_best_packet<DstScalar,RestrictedInnerSize>::type InnerPacketType; enum { LinearPacketSize = unpacket_traits<LinearPacketType>::size, @@ -711,7 +713,8 @@ protected: public: typedef typename Base::Scalar Scalar; typedef typename Base::DstXprType DstXprType; - typedef typename find_best_packet<Scalar, 4>::type PacketType; + typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits; + typedef typename AssignmentTraits::PacketType PacketType; EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr) : Base(dst, src, func, dstExpr) diff --git a/test/evaluators.cpp b/test/evaluators.cpp index f4fdaf053..ec000f1eb 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -90,6 +90,12 @@ namespace Eigen { { call_assignment_no_alias(dst.expression(), src, func); } + + template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> + EIGEN_DEVICE_FUNC void call_restricted_packet_assignment(const NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) + { + call_restricted_packet_assignment_no_alias(dst.expression(), src, func); + } } } @@ -496,4 +502,23 @@ EIGEN_DECLARE_TEST(evaluators) VERIFY_IS_EQUAL( get_cost(a*(a+b)), 1); VERIFY_IS_EQUAL( get_cost(a.lazyProduct(a+b)), 15); } + + { + // test restricted_packet_assignment with an unaligned destination + const size_t M = 2; + const size_t K = 2; + const size_t N = 5; + float *destMem = new float[(M*N) + 1]; + float *dest = (internal::UIntPtr(destMem)%EIGEN_MAX_ALIGN_BYTES) == 0 ? destMem+1 : destMem; + + const Matrix<float, Dynamic, Dynamic, RowMajor> a = Matrix<float, Dynamic, Dynamic, RowMajor>::Random(M, K); + const Matrix<float, Dynamic, Dynamic, RowMajor> b = Matrix<float, Dynamic, Dynamic, RowMajor>::Random(K, N); + + Map<Matrix<float, Dynamic, Dynamic, RowMajor> > z(dest, M, N);; + Product<Matrix<float, Dynamic, Dynamic, RowMajor>, Matrix<float, Dynamic, Dynamic, RowMajor>, LazyProduct> tmp(a,b); + internal::call_restricted_packet_assignment(z.noalias(), tmp.derived(), internal::assign_op<float, float>()); + + VERIFY_IS_APPROX(z, a*b); + delete[] destMem; + } } |