aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2013-11-06 18:17:59 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2013-11-06 18:17:59 +0100
commit8edc9647345445c2b3e8b9ff1c70c625f43755c6 (patch)
treec21a84728ab6f4e67fc931073e8be58e3defbfdf
parenta37bdfc9552374edb022cefa824b8dac56f9b6d6 (diff)
bug #99: refactor assignment and compound assignment mechanism through "assignment functors" and "assignement kernels".
The former is very low level and generic. The later abstarct the former for dense expressions. This refactoring permits to get rid of the very ugly SwapWrapper and SelfCwiseBinaryOp classes. In the future, this will also permit to simplify all these evaluation loops and perhaps to reuse them for reduxions. That will also permit to specialize for operations like expr1 += expr2 outside Eigen, and so for any kind of expressions (dense, sparse, tensor, etc.)
-rw-r--r--Eigen/Core1
-rw-r--r--Eigen/src/Core/AssignEvaluator.h463
-rw-r--r--Eigen/src/Core/CoreEvaluators.h12
-rw-r--r--Eigen/src/Core/functors/AssignmentFunctors.h167
-rw-r--r--test/evaluator_common.h0
5 files changed, 478 insertions, 165 deletions
diff --git a/Eigen/Core b/Eigen/Core
index bf2d3a908..722a49030 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -383,6 +383,7 @@ using std::ptrdiff_t;
#include "src/Core/ArrayWrapper.h"
#ifdef EIGEN_ENABLE_EVALUATORS
+#include "src/Core/functors/AssignmentFunctors.h"
#include "src/Core/Product.h"
#include "src/Core/CoreEvaluators.h"
#include "src/Core/AssignEvaluator.h"
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 5f2fb9d38..adea7ef13 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2013 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
//
// This Source Code Form is subject to the terms of the Mozilla
@@ -94,7 +94,7 @@ public:
enum {
Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
? (
- int(MayUnrollCompletely) ? int(CompleteUnrolling)
+ int(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(MayUnrollInner) ? int(InnerUnrolling)
: int(NoUnrolling)
)
@@ -139,7 +139,7 @@ public:
*** Default traversal ***
************************/
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
{
typedef typename DstEvaluatorType::XprType DstXprType;
@@ -150,69 +150,74 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
};
static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
- SrcEvaluatorType &srcEvaluator)
+ SrcEvaluatorType &srcEvaluator,
+ const Kernel &kernel
+ )
{
- dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
+ kernel.assignCoeffByOuterInner(outer, inner, dstEvaluator, srcEvaluator);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling
- <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
- ::run(dstEvaluator, srcEvaluator);
+ <DstEvaluatorType, SrcEvaluatorType, Kernel, Index+1, Stop>
+ ::run(dstEvaluator, srcEvaluator, kernel);
}
};
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
-struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Kernel, Stop, Stop>
{
- static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { }
};
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
{
static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
- SrcEvaluatorType &srcEvaluator,
+ SrcEvaluatorType &srcEvaluator,
+ const Kernel &kernel,
int outer)
{
- dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator);
+ kernel.assignCoeffByOuterInner(outer, Index, dstEvaluator, srcEvaluator);
copy_using_evaluator_DefaultTraversal_InnerUnrolling
- <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
- ::run(dstEvaluator, srcEvaluator, outer);
+ <DstEvaluatorType, SrcEvaluatorType, Kernel, Index+1, Stop>
+ ::run(dstEvaluator, srcEvaluator, kernel, outer);
}
};
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
-struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Kernel, Stop, Stop>
{
- static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&, int) { }
};
/***********************
*** Linear traversal ***
***********************/
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Index, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
{
static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
- SrcEvaluatorType &srcEvaluator)
+ SrcEvaluatorType &srcEvaluator,
+ const Kernel& kernel
+ )
{
- dstEvaluator.copyCoeff(Index, srcEvaluator);
+ kernel.assignCoeff(Index, dstEvaluator, srcEvaluator);
copy_using_evaluator_LinearTraversal_CompleteUnrolling
- <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
- ::run(dstEvaluator, srcEvaluator);
+ <DstEvaluatorType, SrcEvaluatorType, Kernel, Index+1, Stop>
+ ::run(dstEvaluator, srcEvaluator, kernel);
}
};
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
-struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Kernel, Stop, Stop>
{
- static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { }
};
/**************************
*** Inner vectorization ***
**************************/
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Index, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling
{
typedef typename DstEvaluatorType::XprType DstXprType;
@@ -225,63 +230,66 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
};
static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
- SrcEvaluatorType &srcEvaluator)
+ SrcEvaluatorType &srcEvaluator,
+ const Kernel &kernel
+ )
{
- dstEvaluator.template copyPacketByOuterInner<Aligned, JointAlignment>(outer, inner, srcEvaluator);
+ kernel.template assignPacketByOuterInner<Aligned, JointAlignment>(outer, inner, dstEvaluator, srcEvaluator);
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
copy_using_evaluator_innervec_CompleteUnrolling
- <DstEvaluatorType, SrcEvaluatorType, NextIndex, Stop>
- ::run(dstEvaluator, srcEvaluator);
+ <DstEvaluatorType, SrcEvaluatorType, Kernel, NextIndex, Stop>
+ ::run(dstEvaluator, srcEvaluator, kernel);
}
};
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
-struct copy_using_evaluator_innervec_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Kernel, Stop, Stop>
{
- static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { }
};
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Index, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling
{
static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
- SrcEvaluatorType &srcEvaluator,
+ SrcEvaluatorType &srcEvaluator,
+ const Kernel &kernel,
int outer)
{
- dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, Index, srcEvaluator);
+ kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index, dstEvaluator, srcEvaluator);
typedef typename DstEvaluatorType::XprType DstXprType;
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
copy_using_evaluator_innervec_InnerUnrolling
- <DstEvaluatorType, SrcEvaluatorType, NextIndex, Stop>
- ::run(dstEvaluator, srcEvaluator, outer);
+ <DstEvaluatorType, SrcEvaluatorType, Kernel, NextIndex, Stop>
+ ::run(dstEvaluator, srcEvaluator, kernel, outer);
}
};
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
-struct copy_using_evaluator_innervec_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Stop>
+struct copy_using_evaluator_innervec_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Kernel, Stop, Stop>
{
- static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+ static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel &, int) { }
};
/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/
-// copy_using_evaluator_impl is based on assign_impl
+// dense_assignment_loop is based on assign_impl
-template<typename DstXprType, typename SrcXprType,
+template<typename DstXprType, typename SrcXprType, typename Kernel,
int Traversal = copy_using_evaluator_traits<DstXprType, SrcXprType>::Traversal,
int Unrolling = copy_using_evaluator_traits<DstXprType, SrcXprType>::Unrolling>
-struct copy_using_evaluator_impl;
+struct dense_assignment_loop;
/************************
*** Default traversal ***
************************/
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnrolling>
+template<typename DstXprType, typename SrcXprType, typename Kernel>
+struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, DefaultTraversal, NoUnrolling>
{
- static void run(DstXprType& dst, const SrcXprType& src)
+ static void run(DstXprType& dst, const SrcXprType& src, const Kernel &kernel)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -292,16 +300,16 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnr
for(Index outer = 0; outer < dst.outerSize(); ++outer) {
for(Index inner = 0; inner < dst.innerSize(); ++inner) {
- dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
+ kernel.assignCoeffByOuterInner(outer, inner, dstEvaluator, srcEvaluator);
}
}
}
};
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, CompleteUnrolling>
+template<typename DstXprType, typename SrcXprType, typename Kernel>
+struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, DefaultTraversal, CompleteUnrolling>
{
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -310,16 +318,16 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, Compl
SrcEvaluatorType srcEvaluator(src);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling
- <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
- ::run(dstEvaluator, srcEvaluator);
+ <DstEvaluatorType, SrcEvaluatorType, Kernel, 0, DstXprType::SizeAtCompileTime>
+ ::run(dstEvaluator, srcEvaluator, kernel);
}
};
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, InnerUnrolling>
+template<typename DstXprType, typename SrcXprType, typename Kernel>
+struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, DefaultTraversal, InnerUnrolling>
{
typedef typename DstXprType::Index Index;
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -330,8 +338,8 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, Inner
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
copy_using_evaluator_DefaultTraversal_InnerUnrolling
- <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
- ::run(dstEvaluator, srcEvaluator, outer);
+ <DstEvaluatorType, SrcEvaluatorType, Kernel, 0, DstXprType::InnerSizeAtCompileTime>
+ ::run(dstEvaluator, srcEvaluator, kernel, outer);
}
};
@@ -339,43 +347,50 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, Inner
*** Linear vectorization ***
***************************/
+
+// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
+// of the non vectorizable beginning and ending parts
+
template <bool IsAligned = false>
-struct unaligned_copy_using_evaluator_impl
+struct unaligned_dense_assignment_loop
{
// if IsAligned = true, then do nothing
- template <typename SrcEvaluatorType, typename DstEvaluatorType>
- static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&,
+ template <typename SrcEvaluatorType, typename DstEvaluatorType, typename Kernel>
+ static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, const Kernel&,
typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
};
template <>
-struct unaligned_copy_using_evaluator_impl<false>
+struct unaligned_dense_assignment_loop<false>
{
// MSVC must not inline this functions. If it does, it fails to optimize the
// packet access path.
+ // FIXME check which version exhibits this issue
#ifdef _MSC_VER
- template <typename DstEvaluatorType, typename SrcEvaluatorType>
+ template <typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel>
static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator,
const SrcEvaluatorType &srcEvaluator,
+ const Kernel &kernel,
typename DstEvaluatorType::Index start,
typename DstEvaluatorType::Index end)
#else
- template <typename DstEvaluatorType, typename SrcEvaluatorType>
+ template <typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel>
static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
const SrcEvaluatorType &srcEvaluator,
+ const Kernel &kernel,
typename DstEvaluatorType::Index start,
typename DstEvaluatorType::Index end)
#endif
{
for (typename DstEvaluatorType::Index index = start; index < end; ++index)
- dstEvaluator.copyCoeff(index, srcEvaluator);
+ kernel.assignCoeff(index, dstEvaluator, srcEvaluator);
}
};
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, NoUnrolling>
+template<typename DstXprType, typename SrcXprType, typename Kernel>
+struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, LinearVectorizedTraversal, NoUnrolling>
{
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -395,22 +410,20 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&dstEvaluator.coeffRef(0), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
- unaligned_copy_using_evaluator_impl<dstIsAligned!=0>::run(dstEvaluator, srcEvaluator, 0, alignedStart);
+ unaligned_dense_assignment_loop<dstIsAligned!=0>::run(dstEvaluator, srcEvaluator, kernel, 0, alignedStart);
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
- {
- dstEvaluator.template copyPacket<dstAlignment, srcAlignment>(index, srcEvaluator);
- }
+ kernel.template assignPacket<dstAlignment, srcAlignment>(index, dstEvaluator, srcEvaluator);
- unaligned_copy_using_evaluator_impl<>::run(dstEvaluator, srcEvaluator, alignedEnd, size);
+ unaligned_dense_assignment_loop<>::run(dstEvaluator, srcEvaluator, kernel, alignedEnd, size);
}
};
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, CompleteUnrolling>
+template<typename DstXprType, typename SrcXprType, typename Kernel>
+struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename DstXprType::Index Index;
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -423,11 +436,11 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers
alignedSize = (size/packetSize)*packetSize };
copy_using_evaluator_innervec_CompleteUnrolling
- <DstEvaluatorType, SrcEvaluatorType, 0, alignedSize>
- ::run(dstEvaluator, srcEvaluator);
+ <DstEvaluatorType, SrcEvaluatorType, Kernel, 0, alignedSize>
+ ::run(dstEvaluator, srcEvaluator, kernel);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling
- <DstEvaluatorType, SrcEvaluatorType, alignedSize, size>
- ::run(dstEvaluator, srcEvaluator);
+ <DstEvaluatorType, SrcEvaluatorType, Kernel, alignedSize, size>
+ ::run(dstEvaluator, srcEvaluator, kernel);
}
};
@@ -435,10 +448,10 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers
*** Inner vectorization ***
**************************/
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, NoUnrolling>
+template<typename DstXprType, typename SrcXprType, typename Kernel>
+struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, InnerVectorizedTraversal, NoUnrolling>
{
- static inline void run(DstXprType &dst, const SrcXprType &src)
+ static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -451,16 +464,15 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
const Index outerSize = dst.outerSize();
const Index packetSize = packet_traits<typename DstXprType::Scalar>::size;
for(Index outer = 0; outer < outerSize; ++outer)
- for(Index inner = 0; inner < innerSize; inner+=packetSize) {
- dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, inner, srcEvaluator);
- }
+ for(Index inner = 0; inner < innerSize; inner+=packetSize)
+ kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, inner, dstEvaluator, srcEvaluator);
}
};
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, CompleteUnrolling>
+template<typename DstXprType, typename SrcXprType, typename Kernel>
+struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, InnerVectorizedTraversal, CompleteUnrolling>
{
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -469,16 +481,16 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
SrcEvaluatorType srcEvaluator(src);
copy_using_evaluator_innervec_CompleteUnrolling
- <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
- ::run(dstEvaluator, srcEvaluator);
+ <DstEvaluatorType, SrcEvaluatorType, Kernel, 0, DstXprType::SizeAtCompileTime>
+ ::run(dstEvaluator, srcEvaluator, kernel);
}
};
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, InnerUnrolling>
+template<typename DstXprType, typename SrcXprType, typename Kernel>
+struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, InnerVectorizedTraversal, InnerUnrolling>
{
typedef typename DstXprType::Index Index;
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -489,8 +501,8 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
copy_using_evaluator_innervec_InnerUnrolling
- <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
- ::run(dstEvaluator, srcEvaluator, outer);
+ <DstEvaluatorType, SrcEvaluatorType, Kernel, 0, DstXprType::InnerSizeAtCompileTime>
+ ::run(dstEvaluator, srcEvaluator, kernel, outer);
}
};
@@ -498,10 +510,10 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
*** Linear traversal ***
***********************/
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnrolling>
+template<typename DstXprType, typename SrcXprType, typename Kernel>
+struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, LinearTraversal, NoUnrolling>
{
- static inline void run(DstXprType &dst, const SrcXprType &src)
+ static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -512,14 +524,14 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnro
const Index size = dst.size();
for(Index i = 0; i < size; ++i)
- dstEvaluator.copyCoeff(i, srcEvaluator);
+ kernel.assignCoeff(i, dstEvaluator, srcEvaluator);
}
};
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, CompleteUnrolling>
+template<typename DstXprType, typename SrcXprType, typename Kernel>
+struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, LinearTraversal, CompleteUnrolling>
{
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -528,8 +540,8 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, Comple
SrcEvaluatorType srcEvaluator(src);
copy_using_evaluator_LinearTraversal_CompleteUnrolling
- <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
- ::run(dstEvaluator, srcEvaluator);
+ <DstEvaluatorType, SrcEvaluatorType, Kernel, 0, DstXprType::SizeAtCompileTime>
+ ::run(dstEvaluator, srcEvaluator, kernel);
}
};
@@ -537,10 +549,10 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, Comple
*** Slice vectorization ***
***************************/
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversal, NoUnrolling>
+template<typename DstXprType, typename SrcXprType, typename Kernel>
+struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, SliceVectorizedTraversal, NoUnrolling>
{
- static inline void run(DstXprType &dst, const SrcXprType &src)
+ static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -566,19 +578,16 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversa
{
const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
// do the non-vectorizable part of the assignment
- for(Index inner = 0; inner<alignedStart ; ++inner) {
- dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
- }
+ for(Index inner = 0; inner<alignedStart ; ++inner)
+ kernel.assignCoeffByOuterInner(outer, inner, dstEvaluator, srcEvaluator);
// do the vectorizable part of the assignment
- for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) {
- dstEvaluator.template copyPacketByOuterInner<dstAlignment, Unaligned>(outer, inner, srcEvaluator);
- }
+ for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
+ kernel.template assignPacketByOuterInner<dstAlignment, Unaligned>(outer, inner, dstEvaluator, srcEvaluator);
// do the non-vectorizable part of the assignment
- for(Index inner = alignedEnd; inner<innerSize ; ++inner) {
- dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
- }
+ for(Index inner = alignedEnd; inner<innerSize ; ++inner)
+ kernel.assignCoeffByOuterInner(outer, inner, dstEvaluator, srcEvaluator);
alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
}
@@ -589,10 +598,12 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversa
*** All-at-once traversal ***
****************************/
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoUnrolling>
+// TODO: this 'AllAtOnceTraversal' should be dropped or caught earlier (Gael)
+// Indeed, what to do with the kernel??
+template<typename DstXprType, typename SrcXprType, typename Kernel>
+struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, AllAtOnceTraversal, NoUnrolling>
{
- static inline void run(DstXprType &dst, const SrcXprType &src)
+ static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &/*kernel*/)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -601,23 +612,126 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoU
SrcEvaluatorType srcEvaluator(src);
// Evaluate rhs in temporary to prevent aliasing problems in a = a * a;
- // TODO: Do not pass the xpr object to evalTo()
+ // TODO: Do not pass the xpr object to evalTo() (Jitse)
srcEvaluator.evalTo(dstEvaluator, dst);
}
};
/***************************************************************************
-* Part 4 : Entry points
+* Part 4 : Generic Assignment routine
+***************************************************************************/
+
+// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
+// to another dense writable evaluator.
+// It is parametrized by the actual assignment functor. This abstraction level permits
+// to keep the evaluation loops as simple and as generic as possible.
+// One can customize the assignment using this generic dense_assignment_kernel with different
+// functors, or by completely overloading it, by-passing a functor.
+// FIXME: This kernel could also holds the destination and source evaluator
+// thus simplifying the dense_assignment_loop prototypes. (Gael)
+template<typename Functor>
+struct generic_dense_assignment_kernel
+{
+ const Functor &m_functor;
+ generic_dense_assignment_kernel(const Functor &func) : m_functor(func) {}
+
+ template<typename DstEvaluatorType, typename SrcEvaluatorType>
+ void assignCoeff(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const
+ {
+ m_functor.assignCoeff(dst.coeffRef(row,col), src.coeff(row,col));
+ }
+
+ template<typename DstEvaluatorType, typename SrcEvaluatorType>
+ void assignCoeff(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const
+ {
+ m_functor.assignCoeff(dst.coeffRef(index), src.coeff(index));
+ }
+
+ template<typename DstEvaluatorType, typename SrcEvaluatorType>
+ void assignCoeffByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const
+ {
+ typedef typename DstEvaluatorType::Index Index;
+ Index row = rowIndexByOuterInner<DstEvaluatorType>(outer, inner);
+ Index col = colIndexByOuterInner<DstEvaluatorType>(outer, inner);
+ assignCoeff(row, col, dst, src);
+ }
+
+
+ template<int StoreMode, int LoadMode, typename DstEvaluatorType, typename SrcEvaluatorType>
+ void assignPacket(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const
+ {
+ m_functor.assignPacket<StoreMode>(&dst.coeffRef(row,col), src.template packet<LoadMode>(row,col));
+ }
+
+ template<int StoreMode, int LoadMode, typename DstEvaluatorType, typename SrcEvaluatorType>
+ void assignPacket(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const
+ {
+ m_functor.assignPacket<StoreMode>(&dst.coeffRef(index), src.template packet<LoadMode>(index));
+ }
+
+ template<int StoreMode, int LoadMode, typename DstEvaluatorType, typename SrcEvaluatorType>
+ void assignPacketByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const
+ {
+ typedef typename DstEvaluatorType::Index Index;
+ Index row = rowIndexByOuterInner<DstEvaluatorType>(outer, inner);
+ Index col = colIndexByOuterInner<DstEvaluatorType>(outer, inner);
+ assignPacket<StoreMode,LoadMode>(row, col, dst, src);
+ }
+
+ template<typename EvaluatorType, typename Index>
+ static Index rowIndexByOuterInner(Index outer, Index inner)
+ {
+ typedef typename EvaluatorType::ExpressionTraits Traits;
+ return int(Traits::RowsAtCompileTime) == 1 ? 0
+ : int(Traits::ColsAtCompileTime) == 1 ? inner
+ : int(Traits::Flags)&RowMajorBit ? outer
+ : inner;
+ }
+
+ template<typename EvaluatorType, typename Index>
+ static Index colIndexByOuterInner(Index outer, Index inner)
+ {
+ typedef typename EvaluatorType::ExpressionTraits Traits;
+ return int(Traits::ColsAtCompileTime) == 1 ? 0
+ : int(Traits::RowsAtCompileTime) == 1 ? inner
+ : int(Traits::Flags)&RowMajorBit ? inner
+ : outer;
+ }
+};
+
+template<typename DstXprType, typename SrcXprType, typename Functor>
+void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
+{
+#ifdef EIGEN_DEBUG_ASSIGN
+ internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
+#endif
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+
+ typedef generic_dense_assignment_kernel<Functor> Kernel;
+ Kernel kernel(func);
+
+ dense_assignment_loop<DstXprType, SrcXprType, Kernel>::run(const_cast<DstXprType&>(dst), src, kernel);
+}
+
+template<typename DstXprType, typename SrcXprType>
+void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
+{
+ call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
+}
+
+/***************************************************************************
+* Part 5 : Entry points
***************************************************************************/
// Based on DenseBase::LazyAssign()
+// The following functions are just for testing and they are meant to be moved to operator= and the likes.
template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
EIGEN_STRONG_INLINE
const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst,
const EigenBase<SrcXprType>& src)
{
- return noalias_copy_using_evaluator(dst.expression(), src.derived());
+ return noalias_copy_using_evaluator(dst.expression(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
}
template<typename XprType, int AssumeAliasing = evaluator_traits<XprType>::AssumeAliasing>
@@ -641,57 +755,90 @@ struct AddEvalIfAssumingAliasing<XprType, 1>
}
};
-template<typename DstXprType, typename SrcXprType>
+template<typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_STRONG_INLINE
-const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
+const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
{
return noalias_copy_using_evaluator(dst.const_cast_derived(),
- AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived()));
+ AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived()),
+ func
+ );
}
+// this mimics operator=
template<typename DstXprType, typename SrcXprType>
EIGEN_STRONG_INLINE
-const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
+const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
+{
+ return copy_using_evaluator(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+}
+
+template<typename DstXprType, typename SrcXprType, typename Functor>
+EIGEN_STRONG_INLINE
+const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
{
#ifdef EIGEN_DEBUG_ASSIGN
internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
#endif
#ifdef EIGEN_NO_AUTOMATIC_RESIZING
eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size())
- : (dst.rows() == src.rows() && dst.cols() == src.cols())))
- && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
+ : (dst.rows() == src.rows() && dst.cols() == src.cols())))
+ && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
#else
dst.const_cast_derived().resizeLike(src.derived());
#endif
- return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived());
+ call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
+ return dst.derived();
}
-template<typename DstXprType, typename SrcXprType>
+template<typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_STRONG_INLINE
-const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
+const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
{
- return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived());
-}
-
-template<typename DstXprType, typename SrcXprType>
-const DstXprType& copy_using_evaluator_without_resizing(const DstXprType& dst, const SrcXprType& src)
-{
-#ifdef EIGEN_DEBUG_ASSIGN
- internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
-#endif
- eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
- copy_using_evaluator_impl<DstXprType, SrcXprType>::run(const_cast<DstXprType&>(dst), src);
- return dst;
+ call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
+ return dst.derived();
}
// Based on DenseBase::swap()
-// TODO: Chech whether we need to do something special for swapping two
-// Arrays or Matrices.
+// TODO: Check whether we need to do something special for swapping two
+// Arrays or Matrices. (Jitse)
+// Overload default assignPacket behavior for swapping them
+template<typename Scalar>
+struct swap_kernel : generic_dense_assignment_kernel<swap_assign_op<Scalar> >
+{
+ typedef generic_dense_assignment_kernel<swap_assign_op<Scalar> > Base;
+ using Base::m_functor;
+ swap_kernel() : Base(swap_assign_op<Scalar>()) {}
+
+ template<int StoreMode, int LoadMode, typename DstEvaluatorType, typename SrcEvaluatorType>
+ void assignPacket(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const
+ {
+ m_functor.template swapPacket<StoreMode,LoadMode,typename DstEvaluatorType::PacketScalar>(&dst.coeffRef(row,col), &const_cast<SrcEvaluatorType&>(src).coeffRef(row,col));
+ }
+
+ template<int StoreMode, int LoadMode, typename DstEvaluatorType, typename SrcEvaluatorType>
+ void assignPacket(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const
+ {
+ m_functor.template swapPacket<StoreMode,LoadMode,typename DstEvaluatorType::PacketScalar>(&dst.coeffRef(index), &const_cast<SrcEvaluatorType&>(src).coeffRef(index));
+ }
+
+ // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
+ template<int StoreMode, int LoadMode, typename DstEvaluatorType, typename SrcEvaluatorType>
+ void assignPacketByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const
+ {
+ typedef typename DstEvaluatorType::Index Index;
+ Index row = Base::template rowIndexByOuterInner<DstEvaluatorType>(outer, inner);
+ Index col = Base::template colIndexByOuterInner<DstEvaluatorType>(outer, inner);
+ assignPacket<StoreMode,LoadMode>(row, col, dst, src);
+ }
+};
+
template<typename DstXprType, typename SrcXprType>
void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
{
- copy_using_evaluator(SwapWrapper<DstXprType>(const_cast<DstXprType&>(dst)), src);
+ typedef swap_kernel<typename DstXprType::Scalar> kernel;
+ dense_assignment_loop<DstXprType, SrcXprType, kernel>::run(const_cast<DstXprType&>(dst), src, kernel());
}
// Based on MatrixBase::operator+= (in CwiseBinaryOp.h)
@@ -699,8 +846,7 @@ template<typename DstXprType, typename SrcXprType>
void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
{
typedef typename DstXprType::Scalar Scalar;
- SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
- copy_using_evaluator(tmp, src.derived());
+ copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
}
// Based on ArrayBase::operator+=
@@ -708,42 +854,37 @@ template<typename DstXprType, typename SrcXprType>
void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
{
typedef typename DstXprType::Scalar Scalar;
- SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
- copy_using_evaluator(tmp, src.derived());
+ copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
}
-// TODO: Add add_assign_using_evaluator for EigenBase ?
+// TODO: Add add_assign_using_evaluator for EigenBase ? (Jitse)
template<typename DstXprType, typename SrcXprType>
void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
{
typedef typename DstXprType::Scalar Scalar;
- SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
- copy_using_evaluator(tmp, src.derived());
+ copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
}
template<typename DstXprType, typename SrcXprType>
void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
{
typedef typename DstXprType::Scalar Scalar;
- SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
- copy_using_evaluator(tmp, src.derived());
+ copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
}
template<typename DstXprType, typename SrcXprType>
void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
{
typedef typename DstXprType::Scalar Scalar;
- SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
- copy_using_evaluator(tmp, src.derived());
+ copy_using_evaluator(dst.derived(), src.derived(), mul_assign_op<Scalar>());
}
template<typename DstXprType, typename SrcXprType>
void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
{
typedef typename DstXprType::Scalar Scalar;
- SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
- copy_using_evaluator(tmp, src.derived());
+ copy_using_evaluator(dst.derived(), src.derived(), div_assign_op<Scalar>());
}
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 3240ec6ed..082c00df4 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -78,6 +78,8 @@ template<typename ExpressionType>
struct evaluator_impl_base
{
typedef typename ExpressionType::Index Index;
+ // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
+ typedef traits<ExpressionType> ExpressionTraits;
template<typename OtherEvaluatorType>
void copyCoeff(Index row, Index col, const OtherEvaluatorType& other)
@@ -307,15 +309,17 @@ struct evaluator_impl<EvalToTemp<ArgType> >
evaluator_impl(const XprType& xpr)
: m_result(xpr.rows(), xpr.cols()), m_resultImpl(m_result)
- {
- copy_using_evaluator_without_resizing(m_result, xpr.arg());
+ {
+ // TODO we should simply do m_result(xpr.arg());
+ call_dense_assignment_loop(m_result, xpr.arg());
}
// This constructor is used when nesting an EvalTo evaluator in another evaluator
evaluator_impl(const ArgType& arg)
: m_result(arg.rows(), arg.cols()), m_resultImpl(m_result)
- {
- copy_using_evaluator_without_resizing(m_result, arg);
+ {
+ // TODO we should simply do m_result(xpr.arg());
+ call_dense_assignment_loop(m_result, arg);
}
typedef typename PlainObject::Index Index;
diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h
new file mode 100644
index 000000000..ae264aa64
--- /dev/null
+++ b/Eigen/src/Core/functors/AssignmentFunctors.h
@@ -0,0 +1,167 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H
+#define EIGEN_ASSIGNMENT_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal
+ * \brief Template functor for scalar/packet assignment
+ *
+ */
+template<typename Scalar> struct assign_op {
+
+ EIGEN_EMPTY_STRUCT_CTOR(assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; }
+
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
+ { internal::pstoret<Scalar,Packet,Alignment>(a,b); }
+};
+template<typename Scalar>
+struct functor_traits<assign_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::ReadCost,
+ PacketAccess = packet_traits<Scalar>::IsVectorized
+ };
+};
+
+/** \internal
+ * \brief Template functor for scalar/packet assignment with addition
+ *
+ */
+template<typename Scalar> struct add_assign_op {
+
+ EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; }
+
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
+ { internal::pstoret<Scalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename Scalar>
+struct functor_traits<add_assign_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasAdd
+ };
+};
+
+/** \internal
+ * \brief Template functor for scalar/packet assignment with subtraction
+ *
+ */
+template<typename Scalar> struct sub_assign_op {
+
+ EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; }
+
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
+ { internal::pstoret<Scalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename Scalar>
+struct functor_traits<sub_assign_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasAdd
+ };
+};
+
+/** \internal
+ * \brief Template functor for scalar/packet assignment with multiplication
+ *
+ */
+template<typename Scalar> struct mul_assign_op {
+
+ EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a *= b; }
+
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
+ { internal::pstoret<Scalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename Scalar>
+struct functor_traits<mul_assign_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasMul
+ };
+};
+
+/** \internal
+ * \brief Template functor for scalar/packet assignment with diviving
+ *
+ */
+template<typename Scalar> struct div_assign_op {
+
+ EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a /= b; }
+
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
+ { internal::pstoret<Scalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename Scalar>
+struct functor_traits<div_assign_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasMul
+ };
+};
+
+
+/** \internal
+ * \brief Template functor for scalar/packet assignment with swaping
+ *
+ * It works as follow. For a non-vectorized evaluation loop, we have:
+ * for(i) func(A.coeffRef(i), B.coeff(i));
+ * where B is a SwapWrapper expression. The trick is to make SwapWrapper::coeff behaves like a non-const coeffRef.
+ * Actually, SwapWrapper might not even be needed since even if B is a plain expression, since it has to be writable
+ * B.coeff already returns a const reference to the underlying scalar value.
+ *
+ * The case of a vectorized loop is more tricky:
+ * for(i,j) func.assignPacket<A_Align>(&A.coeffRef(i,j), B.packet<B_Align>(i,j));
+ * Here, B must be a SwapWrapper whose packet function actually returns a proxy object holding a Scalar*,
+ * the actual alignment and Packet type.
+ *
+ */
+template<typename Scalar> struct swap_assign_op {
+
+ EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
+ {
+ using std::swap;
+ swap(a,const_cast<Scalar&>(b));
+ }
+
+ template<int LhsAlignment, int RhsAlignment, typename Packet>
+ EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const
+ {
+ Packet tmp = internal::ploadt<Packet,RhsAlignment>(b);
+ internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a));
+ internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp);
+ }
+};
+template<typename Scalar>
+struct functor_traits<swap_assign_op<Scalar> > {
+ enum {
+ Cost = 3 * NumTraits<Scalar>::ReadCost,
+ PacketAccess = packet_traits<Scalar>::IsVectorized
+ };
+};
+
+} // namespace internal
+
+} // namespace Eigen
+
+#endif // EIGEN_ASSIGNMENT_FUNCTORS_H
diff --git a/test/evaluator_common.h b/test/evaluator_common.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/test/evaluator_common.h