aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/AssignEvaluator.h11
-rw-r--r--test/evaluators.cpp25
2 files changed, 32 insertions, 4 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 93fb80f12..767b67f0b 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -24,7 +24,7 @@ namespace internal {
// copy_using_evaluator_traits is based on assign_traits
-template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
+template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1>
struct copy_using_evaluator_traits
{
typedef typename DstEvaluator::XprType Dst;
@@ -51,13 +51,15 @@ private:
InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
: int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
: int(Dst::MaxRowsAtCompileTime),
+ RestrictedInnerSize = InnerSize == -1 ? MaxPacketSize : InnerSize,
+ RestrictedLinearSize = Dst::SizeAtCompileTime == -1 ? MaxPacketSize : Dst::SizeAtCompileTime,
OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
MaxSizeAtCompileTime = Dst::SizeAtCompileTime
};
// TODO distinguish between linear traversal and inner-traversals
- typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
- typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
+ typedef typename find_best_packet<DstScalar,RestrictedLinearSize>::type LinearPacketType;
+ typedef typename find_best_packet<DstScalar,RestrictedInnerSize>::type InnerPacketType;
enum {
LinearPacketSize = unpacket_traits<LinearPacketType>::size,
@@ -711,7 +713,8 @@ protected:
public:
typedef typename Base::Scalar Scalar;
typedef typename Base::DstXprType DstXprType;
- typedef typename find_best_packet<Scalar, 4>::type PacketType;
+ typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
+ typedef typename AssignmentTraits::PacketType PacketType;
EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
: Base(dst, src, func, dstExpr)
diff --git a/test/evaluators.cpp b/test/evaluators.cpp
index f4fdaf053..ec000f1eb 100644
--- a/test/evaluators.cpp
+++ b/test/evaluators.cpp
@@ -90,6 +90,12 @@ namespace Eigen {
{
call_assignment_no_alias(dst.expression(), src, func);
}
+
+ template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
+ EIGEN_DEVICE_FUNC void call_restricted_packet_assignment(const NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
+ {
+ call_restricted_packet_assignment_no_alias(dst.expression(), src, func);
+ }
}
}
@@ -496,4 +502,23 @@ EIGEN_DECLARE_TEST(evaluators)
VERIFY_IS_EQUAL( get_cost(a*(a+b)), 1);
VERIFY_IS_EQUAL( get_cost(a.lazyProduct(a+b)), 15);
}
+
+ {
+ // test restricted_packet_assignment with an unaligned destination
+ const size_t M = 2;
+ const size_t K = 2;
+ const size_t N = 5;
+ float *destMem = new float[(M*N) + 1];
+ float *dest = (internal::UIntPtr(destMem)%EIGEN_MAX_ALIGN_BYTES) == 0 ? destMem+1 : destMem;
+
+ const Matrix<float, Dynamic, Dynamic, RowMajor> a = Matrix<float, Dynamic, Dynamic, RowMajor>::Random(M, K);
+ const Matrix<float, Dynamic, Dynamic, RowMajor> b = Matrix<float, Dynamic, Dynamic, RowMajor>::Random(K, N);
+
+ Map<Matrix<float, Dynamic, Dynamic, RowMajor> > z(dest, M, N);;
+ Product<Matrix<float, Dynamic, Dynamic, RowMajor>, Matrix<float, Dynamic, Dynamic, RowMajor>, LazyProduct> tmp(a,b);
+ internal::call_restricted_packet_assignment(z.noalias(), tmp.derived(), internal::assign_op<float, float>());
+
+ VERIFY_IS_APPROX(z, a*b);
+ delete[] destMem;
+ }
}