aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/Core96
-rw-r--r--Eigen/Eigen2Support6
-rw-r--r--Eigen/src/Array/Array.h3
-rw-r--r--Eigen/src/Array/ArrayWrapper.h6
-rw-r--r--Eigen/src/Array/Reverse.h2
-rw-r--r--Eigen/src/Cholesky/LDLT.h74
-rw-r--r--Eigen/src/Cholesky/LLT.h14
-rw-r--r--Eigen/src/Core/Assign.h145
-rw-r--r--Eigen/src/Core/Block.h30
-rw-r--r--Eigen/src/Core/Coeffs.h73
-rw-r--r--Eigen/src/Core/DenseBase.h126
-rw-r--r--Eigen/src/Core/DenseStorageBase.h90
-rw-r--r--Eigen/src/Core/Dot.h232
-rw-r--r--Eigen/src/Core/Flagged.h3
-rw-r--r--Eigen/src/Core/ForceAlignedAccess.h3
-rw-r--r--Eigen/src/Core/Functors.h2
-rw-r--r--Eigen/src/Core/Map.h115
-rw-r--r--Eigen/src/Core/MapBase.h65
-rw-r--r--Eigen/src/Core/Matrix.h8
-rw-r--r--Eigen/src/Core/MatrixStorage.h44
-rw-r--r--Eigen/src/Core/NestByValue.h3
-rw-r--r--Eigen/src/Core/PermutationMatrix.h188
-rw-r--r--Eigen/src/Core/Product.h18
-rw-r--r--Eigen/src/Core/Redux.h38
-rw-r--r--Eigen/src/Core/ReturnByValue.h11
-rw-r--r--Eigen/src/Core/SelfAdjointView.h5
-rw-r--r--Eigen/src/Core/SelfCwiseBinaryOp.h3
-rw-r--r--Eigen/src/Core/Stride.h113
-rw-r--r--Eigen/src/Core/Swap.h11
-rw-r--r--Eigen/src/Core/Transpose.h22
-rw-r--r--Eigen/src/Core/TriangularMatrix.h6
-rw-r--r--Eigen/src/Core/VectorBlock.h1
-rw-r--r--Eigen/src/Core/arch/AltiVec/PacketMath.h5
-rw-r--r--Eigen/src/Core/arch/CMakeLists.txt3
-rw-r--r--Eigen/src/Core/arch/Default/Settings.h65
-rw-r--r--Eigen/src/Core/arch/NEON/CMakeLists.txt6
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h372
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h8
-rw-r--r--Eigen/src/Core/products/CoeffBasedProduct.h20
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h4
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix.h26
-rw-r--r--Eigen/src/Core/util/BlasUtil.h18
-rw-r--r--Eigen/src/Core/util/Constants.h12
-rw-r--r--Eigen/src/Core/util/ForwardDeclarations.h11
-rw-r--r--Eigen/src/Core/util/Macros.h28
-rw-r--r--Eigen/src/Core/util/Memory.h301
-rw-r--r--Eigen/src/Core/util/StaticAssert.h7
-rw-r--r--Eigen/src/Core/util/XprHelper.h23
-rw-r--r--Eigen/src/Eigenvalues/ComplexSchur.h46
-rw-r--r--Eigen/src/Geometry/Scaling.h27
-rw-r--r--Eigen/src/LU/Determinant.h5
-rw-r--r--Eigen/src/LU/FullPivLU.h40
-rw-r--r--Eigen/src/LU/Inverse.h2
-rw-r--r--Eigen/src/LU/PartialPivLU.h23
-rw-r--r--Eigen/src/Sparse/CholmodSupport.h4
-rw-r--r--Eigen/src/Sparse/DynamicSparseMatrix.h2
-rw-r--r--Eigen/src/Sparse/SuperLUSupport.h12
57 files changed, 1824 insertions, 802 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 26195cd35..075f95e5a 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -61,20 +61,45 @@
#ifndef EIGEN_DONT_VECTORIZE
#if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
+
+ // Defines symbols for compile-time detection of which instructions are
+ // used.
+ // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_SSE
+ #define EIGEN_VECTORIZE_SSE2
+
+ // Detect sse3/ssse3/sse4:
+ // gcc and icc defines __SSE3__, ..,
+ // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
+ // want to force the use of those instructions with msvc.
+ #ifdef __SSE3__
+ #define EIGEN_VECTORIZE_SSE3
+ #endif
+ #ifdef __SSSE3__
+ #define EIGEN_VECTORIZE_SSSE3
+ #endif
+ #ifdef __SSE4_1__
+ #define EIGEN_VECTORIZE_SSE4_1
+ #endif
+ #ifdef __SSE4_2__
+ #define EIGEN_VECTORIZE_SSE4_2
+ #endif
+
+ // include files
+
#include <emmintrin.h>
#include <xmmintrin.h>
- #ifdef __SSE3__
+ #ifdef EIGEN_VECTORIZE_SSE3
#include <pmmintrin.h>
#endif
- #ifdef __SSSE3__
+ #ifdef EIGEN_VECTORIZE_SSSE3
#include <tmmintrin.h>
#endif
- #ifdef __SSE4_1__
+ #ifdef EIGEN_VECTORIZE_SSE4_1
#include <smmintrin.h>
#endif
- #ifdef __SSE4_2__
+ #ifdef EIGEN_VECTORIZE_SSE4_2
#include <nmmintrin.h>
#endif
#elif defined __ALTIVEC__
@@ -86,6 +111,10 @@
#undef bool
#undef vector
#undef pixel
+ #elif defined __ARM_NEON__
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_NEON
+ #include <arm_neon.h>
#endif
#endif
@@ -97,18 +126,24 @@
#include <omp.h>
#endif
+#include <cerrno>
#include <cstdlib>
#include <cmath>
#include <complex>
#include <cassert>
#include <functional>
-#include <iostream>
+#include <iosfwd>
#include <cstring>
#include <string>
#include <limits>
// for min/max:
#include <algorithm>
+// for outputting debug info
+#ifdef EIGEN_DEBUG_ASSIGN
+#include<iostream>
+#endif
+
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS)
#define EIGEN_EXCEPTIONS
#endif
@@ -129,6 +164,26 @@
namespace Eigen {
+inline static const char *SimdInstructionSetsInUse(void) {
+#if defined(EIGEN_VECTORIZE_SSE4_2)
+ return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_SSE4_1)
+ return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
+#elif defined(EIGEN_VECTORIZE_SSSE3)
+ return "SSE, SSE2, SSE3, SSSE3";
+#elif defined(EIGEN_VECTORIZE_SSE3)
+ return "SSE, SSE2, SSE3";
+#elif defined(EIGEN_VECTORIZE_SSE2)
+ return "SSE, SSE2";
+#elif defined(EIGEN_VECTORIZE_ALTIVEC)
+ return "AltiVec";
+#elif defined(EIGEN_VECTORIZE_NEON)
+ return "ARM NEON";
+#else
+ return "None";
+#endif
+}
+
// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
// ensure QNX/QCC support
using std::size_t;
@@ -163,11 +218,11 @@ struct Dense {};
#include "src/Core/arch/SSE/MathFunctions.h"
#elif defined EIGEN_VECTORIZE_ALTIVEC
#include "src/Core/arch/AltiVec/PacketMath.h"
+#elif defined EIGEN_VECTORIZE_NEON
+ #include "src/Core/arch/NEON/PacketMath.h"
#endif
-#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
-#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
-#endif
+#include "src/Core/arch/Default/Settings.h"
#include "src/Core/Functors.h"
#include "src/Core/DenseBase.h"
@@ -196,6 +251,7 @@ struct Dense {};
#include "src/Core/Dot.h"
#include "src/Core/StableNorm.h"
#include "src/Core/MapBase.h"
+#include "src/Core/Stride.h"
#include "src/Core/Map.h"
#include "src/Core/Block.h"
#include "src/Core/VectorBlock.h"
@@ -231,28 +287,6 @@ struct Dense {};
#include "src/Core/products/TriangularSolverMatrix.h"
#include "src/Core/BandMatrix.h"
-/** \defgroup Array_Module Array module
- * \ingroup Core_Module
- * This module provides several handy features to manipulate matrices as simple array of values.
- * In addition to listed classes, it defines various methods of the Cwise interface
- * (accessible from MatrixBase::cwise()), including:
- * - matrix-scalar sum,
- * - coeff-wise comparison operators,
- * - sin, cos, sqrt, pow, exp, log, square, cube, inverse (reciprocal).
- *
- * This module also provides various MatrixBase methods, including:
- * - boolean reductions: \ref MatrixBase::all() "all", \ref MatrixBase::any() "any", \ref MatrixBase::count() "count",
- * - \ref MatrixBase::Random() "random matrix initialization",
- * - a \ref MatrixBase::select() "select" function mimicking the trivariate ?: operator,
- * - \ref MatrixBase::colwise() "column-wise" and \ref MatrixBase::rowwise() "row-wise" reductions,
- * - \ref MatrixBase::reverse() "matrix reverse",
- * - \ref MatrixBase::lpNorm() "generic matrix norm".
- *
- * \code
- * #include <Eigen/Core>
- * \endcode
- */
-
#include "src/Array/Functors.h"
#include "src/Array/BooleanRedux.h"
#include "src/Array/Select.h"
diff --git a/Eigen/Eigen2Support b/Eigen/Eigen2Support
index bd6306aff..26b547b9b 100644
--- a/Eigen/Eigen2Support
+++ b/Eigen/Eigen2Support
@@ -26,7 +26,7 @@
#define EIGEN2SUPPORT_H
#if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H))
-#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before any other Eigen header
+#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header
#endif
#include "src/Core/util/DisableMSVCWarnings.h"
@@ -36,6 +36,7 @@ namespace Eigen {
/** \defgroup Eigen2Support_Module Eigen2 support module
* This module provides a couple of deprecated functions improving the compatibility with Eigen2.
*
+ * To use it, define EIGEN2_SUPPORT before including any Eigen header
* \code
* #define EIGEN2_SUPPORT
* \endcode
@@ -51,4 +52,7 @@ namespace Eigen {
#include "src/Core/util/EnableMSVCWarnings.h"
+// Eigen2 used to include iostream
+#include<iostream>
+
#endif // EIGEN2SUPPORT_H
diff --git a/Eigen/src/Array/Array.h b/Eigen/src/Array/Array.h
index 5a398d849..91a091152 100644
--- a/Eigen/src/Array/Array.h
+++ b/Eigen/src/Array/Array.h
@@ -213,6 +213,9 @@ class Array
void swap(ArrayBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other)
{ this->_swap(other.derived()); }
+ inline int innerStride() const { return 1; }
+ inline int outerStride() const { return this->innerSize(); }
+
#ifdef EIGEN_ARRAY_PLUGIN
#include EIGEN_ARRAY_PLUGIN
#endif
diff --git a/Eigen/src/Array/ArrayWrapper.h b/Eigen/src/Array/ArrayWrapper.h
index 75bc33770..0075dd537 100644
--- a/Eigen/src/Array/ArrayWrapper.h
+++ b/Eigen/src/Array/ArrayWrapper.h
@@ -55,7 +55,8 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
inline int rows() const { return m_expression.rows(); }
inline int cols() const { return m_expression.cols(); }
- inline int stride() const { return m_expression.stride(); }
+ inline int outerStride() const { return m_expression.outerStride(); }
+ inline int innerStride() const { return m_expression.innerStride(); }
inline const CoeffReturnType coeff(int row, int col) const
{
@@ -139,7 +140,8 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
inline int rows() const { return m_expression.rows(); }
inline int cols() const { return m_expression.cols(); }
- inline int stride() const { return m_expression.stride(); }
+ inline int outerStride() const { return m_expression.outerStride(); }
+ inline int innerStride() const { return m_expression.innerStride(); }
inline const CoeffReturnType coeff(int row, int col) const
{
diff --git a/Eigen/src/Array/Reverse.h b/Eigen/src/Array/Reverse.h
index a405fbb4b..07b9f77b7 100644
--- a/Eigen/src/Array/Reverse.h
+++ b/Eigen/src/Array/Reverse.h
@@ -81,11 +81,11 @@ template<typename MatrixType, int Direction> class Reverse
typedef typename MatrixType::template MakeBase< Reverse<MatrixType, Direction> >::Type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
+ using Base::IsRowMajor;
protected:
enum {
PacketSize = ei_packet_traits<Scalar>::size,
- IsRowMajor = Flags & RowMajorBit,
IsColMajor = !IsRowMajor,
ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index 4d3149d42..8699fe7e0 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -62,14 +62,21 @@ template<typename _MatrixType> class LDLT
typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType;
typedef Matrix<int, 1, MatrixType::RowsAtCompileTime> IntRowVectorType;
- /**
- * \brief Default Constructor.
- *
- * The default constructor is useful in cases in which the user intends to
- * perform decompositions via LDLT::compute(const MatrixType&).
- */
+ /** \brief Default Constructor.
+ *
+ * The default constructor is useful in cases in which the user intends to
+ * perform decompositions via LDLT::compute(const MatrixType&).
+ */
LDLT() : m_matrix(), m_p(), m_transpositions(), m_isInitialized(false) {}
+ /** \brief Default Constructor with memory preallocation
+ *
+ * Like the default constructor but with preallocation of the internal data
+ * according to the specified problem \a size.
+ * \sa LDLT()
+ */
+ LDLT(int size) : m_matrix(size,size), m_p(size), m_transpositions(size), m_isInitialized(false) {}
+
LDLT(const MatrixType& matrix)
: m_matrix(matrix.rows(), matrix.cols()),
m_p(matrix.rows()),
@@ -148,6 +155,8 @@ template<typename _MatrixType> class LDLT
return m_matrix;
}
+ MatrixType reconstructedMatrix() const;
+
inline int rows() const { return m_matrix.rows(); }
inline int cols() const { return m_matrix.cols(); }
@@ -175,6 +184,10 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
m_matrix = a;
+ m_p.resize(size);
+ m_transpositions.resize(size);
+ m_isInitialized = false;
+
if (size <= 1) {
m_p.setZero();
m_transpositions.setZero();
@@ -202,11 +215,8 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
{
// The biggest overall is the point of reference to which further diagonals
// are compared; if any diagonal is negligible compared
- // to the largest overall, the algorithm bails. This cutoff is suggested
- // in "Analysis of the Cholesky Decomposition of a Semi-definite Matrix" by
- // Nicholas J. Higham. Also see "Accuracy and Stability of Numerical
- // Algorithms" page 217, also by Higham.
- cutoff = ei_abs(NumTraits<Scalar>::epsilon() * RealScalar(size) * biggest_in_corner);
+ // to the largest overall, the algorithm bails.
+ cutoff = ei_abs(NumTraits<Scalar>::epsilon() * biggest_in_corner);
m_sign = ei_real(m_matrix.diagonal().coeff(index_of_biggest_in_corner)) > 0 ? 1 : -1;
}
@@ -231,26 +241,21 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
continue;
}
- RealScalar Djj = ei_real(m_matrix.coeff(j,j) - m_matrix.row(j).head(j)
- .dot(m_matrix.col(j).head(j)));
+ RealScalar Djj = ei_real(m_matrix.coeff(j,j) - m_matrix.row(j).head(j).dot(m_matrix.col(j).head(j)));
m_matrix.coeffRef(j,j) = Djj;
- // Finish early if the matrix is not full rank.
- if(ei_abs(Djj) < cutoff)
- {
- for(int i = j; i < size; i++) m_transpositions.coeffRef(i) = i;
- break;
- }
-
int endSize = size - j - 1;
if (endSize > 0) {
_temporary.tail(endSize).noalias() = m_matrix.block(j+1,0, endSize, j)
* m_matrix.col(j).head(j).conjugate();
m_matrix.row(j).tail(endSize) = m_matrix.row(j).tail(endSize).conjugate()
- - _temporary.tail(endSize).transpose();
+ - _temporary.tail(endSize).transpose();
- m_matrix.col(j).tail(endSize) = m_matrix.row(j).tail(endSize) / Djj;
+ if(ei_abs(Djj) > cutoff)
+ {
+ m_matrix.col(j).tail(endSize) = m_matrix.row(j).tail(endSize) / Djj;
+ }
}
}
@@ -315,6 +320,31 @@ bool LDLT<MatrixType>::solveInPlace(MatrixBase<Derived> &bAndX) const
return true;
}
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: P^T L D L^* P.
+ * This function is provided for debug purpose. */
+template<typename MatrixType>
+MatrixType LDLT<MatrixType>::reconstructedMatrix() const
+{
+ ei_assert(m_isInitialized && "LDLT is not initialized.");
+ const int size = m_matrix.rows();
+ MatrixType res(size,size);
+ res.setIdentity();
+
+ // PI
+ for(int i = 0; i < size; ++i) res.row(m_transpositions.coeff(i)).swap(res.row(i));
+ // L^* P
+ res = matrixL().adjoint() * res;
+ // D(L^*P)
+ res = vectorD().asDiagonal() * res;
+ // L(DL^*P)
+ res = matrixL() * res;
+ // P^T (LDL^*P)
+ for (int i = size-1; i >= 0; --i) res.row(m_transpositions.coeff(i)).swap(res.row(i));
+
+ return res;
+}
+
/** \cholesky_module
* \returns the Cholesky decomposition with full pivoting without square root of \c *this
*/
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index 474b82406..2e8df7661 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -117,7 +117,7 @@ template<typename _MatrixType, int _UpLo> class LLT
&& "LLT::solve(): invalid number of rows of the right hand side matrix b");
return ei_solve_retval<LLT, Rhs>(*this, b.derived());
}
-
+
template<typename Derived>
bool solveInPlace(MatrixBase<Derived> &bAndX) const;
@@ -133,6 +133,8 @@ template<typename _MatrixType, int _UpLo> class LLT
return m_matrix;
}
+ MatrixType reconstructedMatrix() const;
+
inline int rows() const { return m_matrix.rows(); }
inline int cols() const { return m_matrix.cols(); }
@@ -295,6 +297,16 @@ bool LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
return true;
}
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: L L^*.
+ * This function is provided for debug purpose. */
+template<typename MatrixType, int _UpLo>
+MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
+{
+ ei_assert(m_isInitialized && "LLT is not initialized.");
+ return matrixL() * matrixL().adjoint().toDenseMatrix();
+}
+
/** \cholesky_module
* \returns the LLT decomposition of \c *this
*/
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 9440cebf1..eb7bca1da 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2007 Michael Olbrich <michael.olbrich@gmx.net>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
@@ -37,34 +37,35 @@ struct ei_assign_traits
public:
enum {
DstIsAligned = Derived::Flags & AlignedBit,
+ DstHasDirectAccess = Derived::Flags & DirectAccessBit,
SrcIsAligned = OtherDerived::Flags & AlignedBit,
- SrcAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned
+ JointAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned
};
private:
enum {
- InnerSize = int(Derived::Flags)&RowMajorBit
- ? Derived::ColsAtCompileTime
- : Derived::RowsAtCompileTime,
- InnerMaxSize = int(Derived::Flags)&RowMajorBit
- ? Derived::MaxColsAtCompileTime
- : Derived::MaxRowsAtCompileTime,
- MaxSizeAtCompileTime = ei_size_at_compile_time<Derived::MaxColsAtCompileTime,Derived::MaxRowsAtCompileTime>::ret,
+ InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
+ : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
+ : int(Derived::RowsAtCompileTime),
+ InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
+ : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
+ : int(Derived::MaxRowsAtCompileTime),
+ MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
};
enum {
- StorageOrdersAgree = (int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit),
+ StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
MightVectorize = StorageOrdersAgree
&& (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
&& int(DstIsAligned) && int(SrcIsAligned),
MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
- MayLinearVectorize = MightVectorize && MayLinearize
- && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
+ MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
+ && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
- MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
+ MaySliceVectorize = MightVectorize && DstHasDirectAccess && int(InnerMaxSize)>=3*PacketSize
/* slice vectorization can be slow, so we only want it if the slices are big, which is
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
in a fixed-size matrix */
@@ -104,16 +105,18 @@ public:
: int(NoUnrolling)
};
+#ifdef EIGEN_DEBUG_ASSIGN
static void debug()
{
EIGEN_DEBUG_VAR(DstIsAligned)
EIGEN_DEBUG_VAR(SrcIsAligned)
- EIGEN_DEBUG_VAR(SrcAlignment)
+ EIGEN_DEBUG_VAR(JointAlignment)
EIGEN_DEBUG_VAR(InnerSize)
EIGEN_DEBUG_VAR(InnerMaxSize)
EIGEN_DEBUG_VAR(PacketSize)
EIGEN_DEBUG_VAR(StorageOrdersAgree)
EIGEN_DEBUG_VAR(MightVectorize)
+ EIGEN_DEBUG_VAR(MayLinearize)
EIGEN_DEBUG_VAR(MayInnerVectorize)
EIGEN_DEBUG_VAR(MayLinearVectorize)
EIGEN_DEBUG_VAR(MaySliceVectorize)
@@ -123,6 +126,7 @@ public:
EIGEN_DEBUG_VAR(MayUnrollInner)
EIGEN_DEBUG_VAR(Unrolling)
}
+#endif
};
/***************************************************************************
@@ -137,17 +141,13 @@ template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_DefaultTraversal_CompleteUnrolling
{
enum {
- row = int(Derived1::Flags)&RowMajorBit
- ? Index / int(Derived1::ColsAtCompileTime)
- : Index % Derived1::RowsAtCompileTime,
- col = int(Derived1::Flags)&RowMajorBit
- ? Index % int(Derived1::ColsAtCompileTime)
- : Index / Derived1::RowsAtCompileTime
+ outer = Index / Derived1::InnerSizeAtCompileTime,
+ inner = Index % Derived1::InnerSizeAtCompileTime
};
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
- dst.copyCoeff(row, col, src);
+ dst.copyCoeffByOuterInner(outer, inner, src);
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
}
};
@@ -161,13 +161,10 @@ struct ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, St
template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_DefaultTraversal_InnerUnrolling
{
- EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
+ EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
{
- const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
- const int row = rowMajor ? row_or_col : Index;
- const int col = rowMajor ? Index : row_or_col;
- dst.copyCoeff(row, col, src);
- ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col);
+ dst.copyCoeffByOuterInner(outer, Index, src);
+ ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
}
};
@@ -205,18 +202,14 @@ template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_innervec_CompleteUnrolling
{
enum {
- row = int(Derived1::Flags)&RowMajorBit
- ? Index / int(Derived1::ColsAtCompileTime)
- : Index % Derived1::RowsAtCompileTime,
- col = int(Derived1::Flags)&RowMajorBit
- ? Index % int(Derived1::ColsAtCompileTime)
- : Index / Derived1::RowsAtCompileTime,
- SrcAlignment = ei_assign_traits<Derived1,Derived2>::SrcAlignment
+ outer = Index / Derived1::InnerSizeAtCompileTime,
+ inner = Index % Derived1::InnerSizeAtCompileTime,
+ JointAlignment = ei_assign_traits<Derived1,Derived2>::JointAlignment
};
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
- dst.template copyPacket<Derived2, Aligned, SrcAlignment>(row, col, src);
+ dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2,
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
}
@@ -231,13 +224,11 @@ struct ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_innervec_InnerUnrolling
{
- EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
+ EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
{
- const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index;
- const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col;
- dst.template copyPacket<Derived2, Aligned, Aligned>(row, col, src);
+ dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
ei_assign_innervec_InnerUnrolling<Derived1, Derived2,
- Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, row_or_col);
+ Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer);
}
};
@@ -267,14 +258,9 @@ struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
{
const int innerSize = dst.innerSize();
const int outerSize = dst.outerSize();
- for(int j = 0; j < outerSize; ++j)
- for(int i = 0; i < innerSize; ++i)
- {
- if(int(Derived1::Flags)&RowMajorBit)
- dst.copyCoeff(j, i, src);
- else
- dst.copyCoeff(i, j, src);
- }
+ for(int outer = 0; outer < outerSize; ++outer)
+ for(int inner = 0; inner < innerSize; ++inner)
+ dst.copyCoeffByOuterInner(outer, inner, src);
}
};
@@ -293,12 +279,10 @@ struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
- const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
- const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
const int outerSize = dst.outerSize();
- for(int j = 0; j < outerSize; ++j)
- ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, innerSize>
- ::run(dst, src, j);
+ for(int outer = 0; outer < outerSize; ++outer)
+ ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
+ ::run(dst, src, outer);
}
};
@@ -339,14 +323,9 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
const int innerSize = dst.innerSize();
const int outerSize = dst.outerSize();
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
- for(int j = 0; j < outerSize; ++j)
- for(int i = 0; i < innerSize; i+=packetSize)
- {
- if(int(Derived1::Flags)&RowMajorBit)
- dst.template copyPacket<Derived2, Aligned, Aligned>(j, i, src);
- else
- dst.template copyPacket<Derived2, Aligned, Aligned>(i, j, src);
- }
+ for(int outer = 0; outer < outerSize; ++outer)
+ for(int inner = 0; inner < innerSize; inner+=packetSize)
+ dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src);
}
};
@@ -365,12 +344,10 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolli
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
- const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
- const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
const int outerSize = dst.outerSize();
- for(int j = 0; j < outerSize; ++j)
- ei_assign_innervec_InnerUnrolling<Derived1, Derived2, 0, innerSize>
- ::run(dst, src, j);
+ for(int outer = 0; outer < outerSize; ++outer)
+ ei_assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
+ ::run(dst, src, outer);
}
};
@@ -406,7 +383,7 @@ struct ei_unaligned_assign_impl<false>
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
{
- inline static void run(Derived1 &dst, const Derived2 &src)
+ EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
const int size = dst.size();
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
@@ -418,7 +395,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling
for(int index = alignedStart; index < alignedEnd; index += packetSize)
{
- dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
+ dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::JointAlignment>(index, src);
}
ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
@@ -452,40 +429,24 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
const int packetAlignedMask = packetSize - 1;
const int innerSize = dst.innerSize();
const int outerSize = dst.outerSize();
- const int alignedStep = (packetSize - dst.stride() % packetSize) & packetAlignedMask;
+ const int alignedStep = (packetSize - dst.outerStride() % packetSize) & packetAlignedMask;
int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
: ei_first_aligned(&dst.coeffRef(0,0), innerSize);
- for(int i = 0; i < outerSize; ++i)
+ for(int outer = 0; outer < outerSize; ++outer)
{
const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
-
// do the non-vectorizable part of the assignment
- for (int index = 0; index<alignedStart ; ++index)
- {
- if(Derived1::Flags&RowMajorBit)
- dst.copyCoeff(i, index, src);
- else
- dst.copyCoeff(index, i, src);
- }
+ for(int inner = 0; inner<alignedStart ; ++inner)
+ dst.copyCoeffByOuterInner(outer, inner, src);
// do the vectorizable part of the assignment
- for (int index = alignedStart; index<alignedEnd; index+=packetSize)
- {
- if(Derived1::Flags&RowMajorBit)
- dst.template copyPacket<Derived2, Aligned, Unaligned>(i, index, src);
- else
- dst.template copyPacket<Derived2, Aligned, Unaligned>(index, i, src);
- }
+ for(int inner = alignedStart; inner<alignedEnd; inner+=packetSize)
+ dst.template copyPacketByOuterInner<Derived2, Aligned, Unaligned>(outer, inner, src);
// do the non-vectorizable part of the assignment
- for (int index = alignedEnd; index<innerSize ; ++index)
- {
- if(Derived1::Flags&RowMajorBit)
- dst.copyCoeff(i, index, src);
- else
- dst.copyCoeff(index, i, src);
- }
+ for(int inner = alignedEnd; inner<innerSize ; ++inner)
+ dst.copyCoeffByOuterInner(outer, inner, src);
alignedStart = std::min<int>((alignedStart+alignedStep)%packetSize, innerSize);
}
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index 3b4234c22..e6cfb0859 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -100,8 +100,8 @@ template<typename MatrixType, int BlockRows, int BlockCols, int _DirectAccessSta
// The case a 1x1 matrix seems ambiguous, but the result is the same anyway.
m_startRow( (BlockRows==1) && (BlockCols==MatrixType::ColsAtCompileTime) ? i : 0),
m_startCol( (BlockRows==MatrixType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
- m_blockRows(matrix.rows()), // if it is a row, then m_blockRows has a fixed-size of 1, so no pb to try to overwrite it
- m_blockCols(matrix.cols()) // same for m_blockCols
+ m_blockRows(BlockRows==1 ? 1 : matrix.rows()),
+ m_blockCols(BlockCols==1 ? 1 : matrix.cols())
{
ei_assert( (i>=0) && (
((BlockRows==1) && (BlockCols==MatrixType::ColsAtCompileTime) && i<matrix.rows())
@@ -112,7 +112,7 @@ template<typename MatrixType, int BlockRows, int BlockCols, int _DirectAccessSta
*/
inline Block(const MatrixType& matrix, int startRow, int startCol)
: m_matrix(matrix), m_startRow(startRow), m_startCol(startCol),
- m_blockRows(matrix.rows()), m_blockCols(matrix.cols())
+ m_blockRows(BlockRows), m_blockCols(BlockCols)
{
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
ei_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= matrix.rows()
@@ -196,8 +196,8 @@ template<typename MatrixType, int BlockRows, int BlockCols, int _DirectAccessSta
#ifdef EIGEN_PARSED_BY_DOXYGEN
/** \sa MapBase::data() */
inline const Scalar* data() const;
- /** \sa MapBase::stride() */
- inline int stride() const;
+ inline int innerStride() const;
+ inline int outerStride() const;
#endif
protected:
@@ -260,17 +260,23 @@ class Block<MatrixType,BlockRows,BlockCols,HasDirectAccess>
&& startCol >= 0 && blockCols >= 0 && startCol + blockCols <= matrix.cols());
}
- /** \sa MapBase::stride() */
- inline int stride() const
+ /** \sa MapBase::innerStride() */
+ inline int innerStride() const
{
- return ((!Base::IsVectorAtCompileTime)
- || (BlockRows==1 && ((Flags&RowMajorBit)==0))
- || (BlockCols==1 && ((Flags&RowMajorBit)==RowMajorBit)))
- ? m_matrix.stride() : 1;
+ return RowsAtCompileTime==1 ? m_matrix.colStride()
+ : ColsAtCompileTime==1 ? m_matrix.rowStride()
+ : m_matrix.innerStride();
+ }
+
+ /** \sa MapBase::outerStride() */
+ inline int outerStride() const
+ {
+ return IsVectorAtCompileTime ? this->size() : m_matrix.outerStride();
}
#ifndef __SUNPRO_CC
// FIXME sunstudio is not friendly with the above friend...
+ // META-FIXME there is no 'friend' keyword around here. Is this obsolete?
protected:
#endif
diff --git a/Eigen/src/Core/Coeffs.h b/Eigen/src/Core/Coeffs.h
index ebfd0c80e..da7b9153f 100644
--- a/Eigen/src/Core/Coeffs.h
+++ b/Eigen/src/Core/Coeffs.h
@@ -25,6 +25,24 @@
#ifndef EIGEN_COEFFS_H
#define EIGEN_COEFFS_H
+template<typename Derived>
+EIGEN_STRONG_INLINE int DenseBase<Derived>::rowIndexByOuterInner(int outer, int inner)
+{
+ return int(Derived::RowsAtCompileTime) == 1 ? 0
+ : int(Derived::ColsAtCompileTime) == 1 ? inner
+ : int(Derived::Flags)&RowMajorBit ? outer
+ : inner;
+}
+
+template<typename Derived>
+EIGEN_STRONG_INLINE int DenseBase<Derived>::colIndexByOuterInner(int outer, int inner)
+{
+ return int(Derived::ColsAtCompileTime) == 1 ? 0
+ : int(Derived::RowsAtCompileTime) == 1 ? inner
+ : int(Derived::Flags)&RowMajorBit ? inner
+ : outer;
+}
+
/** Short version: don't use this function, use
* \link operator()(int,int) const \endlink instead.
*
@@ -48,6 +66,14 @@ EIGEN_STRONG_INLINE const typename DenseBase<Derived>::CoeffReturnType DenseBase
return derived().coeff(row, col);
}
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::CoeffReturnType DenseBase<Derived>
+ ::coeffByOuterInner(int outer, int inner) const
+{
+ return coeff(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner));
+}
+
/** \returns the coefficient at given the given row and column.
*
* \sa operator()(int,int), operator[](int) const
@@ -84,6 +110,14 @@ EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& DenseBase<Derived>
return derived().coeffRef(row, col);
}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& DenseBase<Derived>
+ ::coeffRefByOuterInner(int outer, int inner)
+{
+ return coeffRef(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner));
+}
+
/** \returns a reference to the coefficient at given the given row and column.
*
* \sa operator()(int,int) const, operator[](int)
@@ -261,6 +295,15 @@ DenseBase<Derived>::packet(int row, int col) const
return derived().template packet<LoadMode>(row,col);
}
+template<typename Derived>
+template<int LoadMode>
+EIGEN_STRONG_INLINE typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type
+DenseBase<Derived>::packetByOuterInner(int outer, int inner) const
+{
+ return packet<LoadMode>(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner));
+}
+
/** Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit.
@@ -279,6 +322,16 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::writePacket
derived().template writePacket<StoreMode>(row,col,x);
}
+template<typename Derived>
+template<int StoreMode>
+EIGEN_STRONG_INLINE void DenseBase<Derived>::writePacketByOuterInner
+(int outer, int inner, const typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type& x)
+{
+ writePacket<StoreMode>(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner),
+ x);
+}
+
/** \returns the packet of coefficients starting at the given index. It is your responsibility
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit and the LinearAccessBit.
@@ -346,6 +399,16 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::copyCoeff(int index, const DenseBas
derived().coeffRef(index) = other.derived().coeff(index);
}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE void DenseBase<Derived>::copyCoeffByOuterInner(int outer, int inner, const DenseBase<OtherDerived>& other)
+{
+ const int row = Derived::rowIndexByOuterInner(outer,inner);
+ const int col = Derived::colIndexByOuterInner(outer,inner);
+ // derived() is important here: copyCoeff() may be reimplemented in Derived!
+ derived().copyCoeff(row, col, other);
+}
+
/** \internal Copies the packet at position (row,col) of other into *this.
*
* This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
@@ -379,6 +442,16 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::copyPacket(int index, const DenseBa
other.derived().template packet<LoadMode>(index));
}
+template<typename Derived>
+template<typename OtherDerived, int StoreMode, int LoadMode>
+EIGEN_STRONG_INLINE void DenseBase<Derived>::copyPacketByOuterInner(int outer, int inner, const DenseBase<OtherDerived>& other)
+{
+ const int row = Derived::rowIndexByOuterInner(outer,inner);
+ const int col = Derived::colIndexByOuterInner(outer,inner);
+ // derived() is important here: copyCoeff() may be reimplemented in Derived!
+ derived().copyPacket<OtherDerived, StoreMode, LoadMode>(row, col, other);
+}
+
template<typename Derived, bool JustReturnZero>
struct ei_first_aligned_impl
{
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index eb018738f..67540bd8c 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -124,7 +124,14 @@ template<typename Derived> class DenseBase
* constructed from this one. See the \ref flags "list of flags".
*/
- IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression is row major. */
+ IsRowMajor = RowsAtCompileTime==1 ? 1
+ : ColsAtCompileTime==1 ? 0
+ : int(Flags) & RowMajorBit, /**< True if this expression has row-major effective addressing.
+ For non-vectors, it is like reading the RowMajorBit on the Flags. For vectors, this is
+ overriden by the convention that row-vectors are row-major and column-vectors are column-major. */
+
+ InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? SizeAtCompileTime
+ : int(Flags)&RowMajorBit ? ColsAtCompileTime : RowsAtCompileTime,
CoeffReadCost = ei_traits<Derived>::CoeffReadCost,
/**< This is a rough measure of how expensive it is to read one coefficient from
@@ -160,31 +167,98 @@ template<typename Derived> class DenseBase
* In other words, this function returns
* \code rows()==1 || cols()==1 \endcode
* \sa rows(), cols(), IsVectorAtCompileTime. */
- inline bool isVector() const { return rows()==1 || cols()==1; }
- /** \returns the size of the storage major dimension,
- * i.e., the number of columns for a columns major matrix, and the number of rows otherwise */
- int outerSize() const { return (int(Flags)&RowMajorBit) ? this->rows() : this->cols(); }
- /** \returns the size of the inner dimension according to the storage order,
- * i.e., the number of rows for a columns major matrix, and the number of cols otherwise */
- int innerSize() const { return (int(Flags)&RowMajorBit) ? this->cols() : this->rows(); }
-
- /** Only plain matrices, not expressions may be resized; therefore the only useful resize method is
- * Matrix::resize(). The present method only asserts that the new size equals the old size, and does
+
+ /** \returns the outer size.
+ *
+ * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
+ * with respect to the storage order, i.e., the number of columns for a column-major matrix,
+ * and the number of rows for a row-major matrix. */
+ int outerSize() const
+ {
+ return IsVectorAtCompileTime ? 1
+ : (int(Flags)&RowMajorBit) ? this->rows() : this->cols();
+ }
+
+ /** \returns the inner size.
+ *
+ * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
+ * with respect to the storage order, i.e., the number of rows for a column-major matrix,
+ * and the number of columns for a row-major matrix. */
+ int innerSize() const
+ {
+ return IsVectorAtCompileTime ? this->size()
+ : (int(Flags)&RowMajorBit) ? this->cols() : this->rows();
+ }
+
+ /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
+ * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
* nothing else.
*/
void resize(int size)
{
ei_assert(size == this->size()
- && "MatrixBase::resize() does not actually allow to resize.");
+ && "DenseBase::resize() does not actually allow to resize.");
}
- /** Only plain matrices, not expressions may be resized; therefore the only useful resize method is
- * Matrix::resize(). The present method only asserts that the new size equals the old size, and does
+ /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
+ * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
* nothing else.
*/
void resize(int rows, int cols)
{
ei_assert(rows == this->rows() && cols == this->cols()
- && "MatrixBase::resize() does not actually allow to resize.");
+ && "DenseBase::resize() does not actually allow to resize.");
+ }
+
+ /** \returns the pointer increment between two consecutive elements.
+ *
+ * \note For vectors, the storage order is ignored. For matrices (non-vectors), we're looking
+ * at the increment between two consecutive elements within a slice in the inner direction.
+ *
+ * \sa outerStride(), rowStride(), colStride()
+ */
+ inline int innerStride() const
+ {
+ EIGEN_STATIC_ASSERT(int(Flags)&DirectAccessBit,
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
+ return derived().innerStride();
+ }
+
+ /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns
+ * in a column-major matrix).
+ *
+ * \note For vectors, the storage order is ignored, there is only one inner slice, and so this method returns 1.
+ * For matrices (non-vectors), the notion of inner slice depends on the storage order.
+ *
+ * \sa innerStride(), rowStride(), colStride()
+ */
+ inline int outerStride() const
+ {
+ EIGEN_STATIC_ASSERT(int(Flags)&DirectAccessBit,
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
+ return derived().outerStride();
+ }
+
+ inline int stride() const
+ {
+ return IsVectorAtCompileTime ? innerStride() : outerStride();
+ }
+
+ /** \returns the pointer increment between two consecutive rows.
+ *
+ * \sa innerStride(), outerStride(), colStride()
+ */
+ inline int rowStride() const
+ {
+ return IsRowMajor ? outerStride() : innerStride();
+ }
+
+ /** \returns the pointer increment between two consecutive columns.
+ *
+ * \sa innerStride(), outerStride(), rowStride()
+ */
+ inline int colStride() const
+ {
+ return IsRowMajor ? innerStride() : outerStride();
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -242,9 +316,11 @@ template<typename Derived> class DenseBase
CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
const CoeffReturnType coeff(int row, int col) const;
+ const CoeffReturnType coeffByOuterInner(int outer, int inner) const;
const CoeffReturnType operator()(int row, int col) const;
Scalar& coeffRef(int row, int col);
+ Scalar& coeffRefByOuterInner(int outer, int inner);
Scalar& operator()(int row, int col);
const CoeffReturnType coeff(int index) const;
@@ -259,17 +335,30 @@ template<typename Derived> class DenseBase
template<typename OtherDerived>
void copyCoeff(int row, int col, const DenseBase<OtherDerived>& other);
template<typename OtherDerived>
+ void copyCoeffByOuterInner(int outer, int inner, const DenseBase<OtherDerived>& other);
+ template<typename OtherDerived>
void copyCoeff(int index, const DenseBase<OtherDerived>& other);
template<typename OtherDerived, int StoreMode, int LoadMode>
void copyPacket(int row, int col, const DenseBase<OtherDerived>& other);
template<typename OtherDerived, int StoreMode, int LoadMode>
+ void copyPacketByOuterInner(int outer, int inner, const DenseBase<OtherDerived>& other);
+ template<typename OtherDerived, int StoreMode, int LoadMode>
void copyPacket(int index, const DenseBase<OtherDerived>& other);
+
+ private:
+ static int rowIndexByOuterInner(int outer, int inner);
+ static int colIndexByOuterInner(int outer, int inner);
+ public:
#endif // not EIGEN_PARSED_BY_DOXYGEN
template<int LoadMode>
PacketScalar packet(int row, int col) const;
+ template<int LoadMode>
+ PacketScalar packetByOuterInner(int outer, int inner) const;
template<int StoreMode>
void writePacket(int row, int col, const PacketScalar& x);
+ template<int StoreMode>
+ void writePacketByOuterInner(int outer, int inner, const PacketScalar& x);
template<int LoadMode>
PacketScalar packet(int index) const;
@@ -409,13 +498,6 @@ template<typename Derived> class DenseBase
template<typename OtherDerived>
void swap(DenseBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other);
- /** \returns number of elements to skip to pass from one row (resp. column) to another
- * for a row-major (resp. column-major) matrix.
- * Combined with coeffRef() and the \ref flags flags, it allows a direct access to the data
- * of the underlying matrix.
- */
- inline int stride() const { return derived().stride(); }
-
inline const NestByValue<Derived> nestByValue() const;
inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
inline ForceAlignedAccess<Derived> forceAlignedAccess();
diff --git a/Eigen/src/Core/DenseStorageBase.h b/Eigen/src/Core/DenseStorageBase.h
index 6245b8007..dac2142a4 100644
--- a/Eigen/src/Core/DenseStorageBase.h
+++ b/Eigen/src/Core/DenseStorageBase.h
@@ -75,23 +75,6 @@ class DenseStorageBase : public _Base<Derived>
EIGEN_STRONG_INLINE int rows() const { return m_storage.rows(); }
EIGEN_STRONG_INLINE int cols() const { return m_storage.cols(); }
- /** Returns the leading dimension (for matrices) or the increment (for vectors) to be used with data().
- *
- * More precisely:
- * - for a column major matrix it returns the number of elements between two successive columns
- * - for a row major matrix it returns the number of elements between two successive rows
- * - for a vector it returns the number of elements between two successive coefficients
- * This function has to be used together with the MapBase::data() function.
- *
- * \sa data() */
- EIGEN_STRONG_INLINE int stride() const
- {
- if(IsVectorAtCompileTime)
- return 1;
- else
- return (Flags & RowMajorBit) ? m_storage.cols() : m_storage.rows();
- }
-
EIGEN_STRONG_INLINE const Scalar& coeff(int row, int col) const
{
if(Flags & RowMajorBit)
@@ -253,12 +236,12 @@ class DenseStorageBase : public _Base<Derived>
{
if(RowsAtCompileTime == 1)
{
- ei_assert(other.isVector());
+ ei_assert(other.rows() == 1 || other.cols() == 1);
resize(1, other.size());
}
else if(ColsAtCompileTime == 1)
{
- ei_assert(other.isVector());
+ ei_assert(other.rows() == 1 || other.cols() == 1);
resize(other.size(), 1);
}
else resize(other.rows(), other.cols());
@@ -379,6 +362,9 @@ class DenseStorageBase : public _Base<Derived>
* while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
* \a data pointers.
*
+ * These methods do not allow to specify strides. If you need to specify strides, you have to
+ * use the Map class directly.
+ *
* \see class Map
*/
//@{
@@ -544,11 +530,21 @@ struct ei_conservative_resize_like_impl
{
if (_this.rows() == rows && _this.cols() == cols) return;
EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
- typename Derived::PlainObject tmp(rows,cols);
- const int common_rows = std::min(rows, _this.rows());
- const int common_cols = std::min(cols, _this.cols());
- tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
- _this.derived().swap(tmp);
+
+ if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows
+ (!Derived::IsRowMajor && _this.rows() == rows) ) // column-major and we change only the number of columns
+ {
+ _this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
+ }
+ else
+ {
+ // The storage order does not allow us to use reallocation.
+ typename Derived::PlainObject tmp(rows,cols);
+ const int common_rows = std::min(rows, _this.rows());
+ const int common_cols = std::min(cols, _this.cols());
+ tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
+ _this.derived().swap(tmp);
+ }
}
static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
@@ -563,11 +559,26 @@ struct ei_conservative_resize_like_impl
EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived)
- typename Derived::PlainObject tmp(other);
- const int common_rows = std::min(tmp.rows(), _this.rows());
- const int common_cols = std::min(tmp.cols(), _this.cols());
- tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
- _this.derived().swap(tmp);
+ if ( ( Derived::IsRowMajor && _this.cols() == other.cols()) || // row-major and we change only the number of rows
+ (!Derived::IsRowMajor && _this.rows() == other.rows()) ) // column-major and we change only the number of columns
+ {
+ const int new_rows = other.rows() - _this.rows();
+ const int new_cols = other.cols() - _this.cols();
+ _this.derived().m_storage.conservativeResize(other.size(),other.rows(),other.cols());
+ if (new_rows>0)
+ _this.corner(BottomRight, new_rows, other.cols()) = other.corner(BottomRight, new_rows, other.cols());
+ else if (new_cols>0)
+ _this.corner(BottomRight, other.rows(), new_cols) = other.corner(BottomRight, other.rows(), new_cols);
+ }
+ else
+ {
+ // The storage order does not allow us to use reallocation.
+ typename Derived::PlainObject tmp(other);
+ const int common_rows = std::min(tmp.rows(), _this.rows());
+ const int common_cols = std::min(tmp.cols(), _this.cols());
+ tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
+ _this.derived().swap(tmp);
+ }
}
};
@@ -576,22 +587,23 @@ struct ei_conservative_resize_like_impl<Derived,OtherDerived,true>
{
static void run(DenseBase<Derived>& _this, int size)
{
- if (_this.size() == size) return;
- typename Derived::PlainObject tmp(size);
- const int common_size = std::min<int>(_this.size(),size);
- tmp.segment(0,common_size) = _this.segment(0,common_size);
- _this.derived().swap(tmp);
+ const int new_rows = Derived::RowsAtCompileTime==1 ? 1 : size;
+ const int new_cols = Derived::RowsAtCompileTime==1 ? size : 1;
+ _this.derived().m_storage.conservativeResize(size,new_rows,new_cols);
}
static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
{
if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
- // segment(...) will check whether Derived/OtherDerived are vectors!
- typename Derived::PlainObject tmp(other);
- const int common_size = std::min<int>(_this.size(),tmp.size());
- tmp.segment(0,common_size) = _this.segment(0,common_size);
- _this.derived().swap(tmp);
+ const int num_new_elements = other.size() - _this.size();
+
+ const int new_rows = Derived::RowsAtCompileTime==1 ? 1 : other.rows();
+ const int new_cols = Derived::RowsAtCompileTime==1 ? other.cols() : 1;
+ _this.derived().m_storage.conservativeResize(other.size(),new_rows,new_cols);
+
+ if (num_new_elements > 0)
+ _this.tail(num_new_elements) = other.tail(num_new_elements);
}
};
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index 201bd23ca..fbdc67bd3 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2006-2008, 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -25,224 +25,35 @@
#ifndef EIGEN_DOT_H
#define EIGEN_DOT_H
-/***************************************************************************
-* Part 1 : the logic deciding a strategy for vectorization and unrolling
-***************************************************************************/
-
-template<typename Derived1, typename Derived2>
-struct ei_dot_traits
-{
-public:
- enum {
- Traversal = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit)
- && (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit)
- ? LinearVectorizedTraversal
- : DefaultTraversal
- };
-
-private:
- typedef typename Derived1::Scalar Scalar;
- enum {
- PacketSize = ei_packet_traits<Scalar>::size,
- Cost = Derived1::SizeAtCompileTime * (Derived1::CoeffReadCost + Derived2::CoeffReadCost + NumTraits<Scalar>::MulCost)
- + (Derived1::SizeAtCompileTime-1) * NumTraits<Scalar>::AddCost,
- UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
- };
-
-public:
- enum {
- Unrolling = Cost <= UnrollingLimit
- ? CompleteUnrolling
- : NoUnrolling
- };
-};
-
-/***************************************************************************
-* Part 2 : unrollers
-***************************************************************************/
-
-/*** no vectorization ***/
-
-template<typename Derived1, typename Derived2, int Start, int Length>
-struct ei_dot_novec_unroller
-{
- enum {
- HalfLength = Length/2
- };
-
- typedef typename Derived1::Scalar Scalar;
-
- inline static Scalar run(const Derived1& v1, const Derived2& v2)
- {
- return ei_dot_novec_unroller<Derived1, Derived2, Start, HalfLength>::run(v1, v2)
- + ei_dot_novec_unroller<Derived1, Derived2, Start+HalfLength, Length-HalfLength>::run(v1, v2);
- }
-};
-
-template<typename Derived1, typename Derived2, int Start>
-struct ei_dot_novec_unroller<Derived1, Derived2, Start, 1>
-{
- typedef typename Derived1::Scalar Scalar;
-
- inline static Scalar run(const Derived1& v1, const Derived2& v2)
- {
- return ei_conj(v1.coeff(Start)) * v2.coeff(Start);
- }
-};
-
-/*** vectorization ***/
-
-template<typename Derived1, typename Derived2, int Index, int Stop,
- bool LastPacket = (Stop-Index == ei_packet_traits<typename Derived1::Scalar>::size)>
-struct ei_dot_vec_unroller
-{
- typedef typename Derived1::Scalar Scalar;
- typedef typename ei_packet_traits<Scalar>::type PacketScalar;
-
- enum {
- row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index,
- col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0,
- row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index,
- col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0
- };
-
- inline static PacketScalar run(const Derived1& v1, const Derived2& v2)
- {
- return ei_pmadd(
- v1.template packet<Aligned>(row1, col1),
- v2.template packet<Aligned>(row2, col2),
- ei_dot_vec_unroller<Derived1, Derived2, Index+ei_packet_traits<Scalar>::size, Stop>::run(v1, v2)
- );
- }
-};
-
-template<typename Derived1, typename Derived2, int Index, int Stop>
-struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true>
-{
- enum {
- row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index,
- col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0,
- row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index,
- col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0,
- alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned,
- alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned
- };
-
- typedef typename Derived1::Scalar Scalar;
- typedef typename ei_packet_traits<Scalar>::type PacketScalar;
-
- inline static PacketScalar run(const Derived1& v1, const Derived2& v2)
- {
- return ei_pmul(v1.template packet<alignment1>(row1, col1), v2.template packet<alignment2>(row2, col2));
- }
-};
-
-/***************************************************************************
-* Part 3 : implementation of all cases
-***************************************************************************/
-
-template<typename Derived1, typename Derived2,
- int Traversal = ei_dot_traits<Derived1, Derived2>::Traversal,
- int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling
+// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot
+// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
+// looking at the static assertions. Thus this is a trick to get better compile errors.
+template<typename T, typename U,
+// the NeedToTranspose condition here is taken straight from Assign.h
+ bool NeedToTranspose = T::IsVectorAtCompileTime
+ && U::IsVectorAtCompileTime
+ && ((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1)
+ | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
+ // revert to || as soon as not needed anymore.
+ (int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))
>
-struct ei_dot_impl;
-
-template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
+struct ei_dot_nocheck
{
- typedef typename Derived1::Scalar Scalar;
- static Scalar run(const Derived1& v1, const Derived2& v2)
+ static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
- ei_assert(v1.size()>0 && "you are using a non initialized vector");
- Scalar res;
- res = ei_conj(v1.coeff(0)) * v2.coeff(0);
- for(int i = 1; i < v1.size(); ++i)
- res += ei_conj(v1.coeff(i)) * v2.coeff(i);
- return res;
+ return a.conjugate().cwiseProduct(b).sum();
}
};
-template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
- : public ei_dot_novec_unroller<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
-{};
-
-template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
-{
- typedef typename Derived1::Scalar Scalar;
- typedef typename ei_packet_traits<Scalar>::type PacketScalar;
-
- static Scalar run(const Derived1& v1, const Derived2& v2)
- {
- const int size = v1.size();
- const int packetSize = ei_packet_traits<Scalar>::size;
- const int alignedSize = (size/packetSize)*packetSize;
- enum {
- alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned,
- alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned
- };
- Scalar res;
-
- // do the vectorizable part of the sum
- if(size >= packetSize)
- {
- PacketScalar packet_res = ei_pmul(
- v1.template packet<alignment1>(0),
- v2.template packet<alignment2>(0)
- );
- for(int index = packetSize; index<alignedSize; index += packetSize)
- {
- packet_res = ei_pmadd(
- v1.template packet<alignment1>(index),
- v2.template packet<alignment2>(index),
- packet_res
- );
- }
- res = ei_predux(packet_res);
-
- // now we must do the rest without vectorization.
- if(alignedSize == size) return res;
- }
- else // too small to vectorize anything.
- // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
- {
- res = Scalar(0);
- }
-
- // do the remainder of the vector
- for(int index = alignedSize; index < size; ++index)
- {
- res += v1.coeff(index) * v2.coeff(index);
- }
-
- return res;
- }
-};
-
-template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
+template<typename T, typename U>
+struct ei_dot_nocheck<T, U, true>
{
- typedef typename Derived1::Scalar Scalar;
- typedef typename ei_packet_traits<Scalar>::type PacketScalar;
- enum {
- PacketSize = ei_packet_traits<Scalar>::size,
- Size = Derived1::SizeAtCompileTime,
- VectorizedSize = (Size / PacketSize) * PacketSize
- };
- static Scalar run(const Derived1& v1, const Derived2& v2)
+ static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
- Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizedSize>::run(v1, v2));
- if (VectorizedSize != Size)
- res += ei_dot_novec_unroller<Derived1, Derived2, VectorizedSize, Size-VectorizedSize>::run(v1, v2);
- return res;
+ return a.adjoint().cwiseProduct(b).sum();
}
};
-/***************************************************************************
-* Part 4 : implementation of MatrixBase methods
-***************************************************************************/
-
/** \returns the dot product of *this with other.
*
* \only_for_vectors
@@ -266,10 +77,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
ei_assert(size() == other.size());
- // dot() must honor EvalBeforeNestingBit (eg: v.dot(M*v) )
- typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
- typedef typename ei_cleantype<typename OtherDerived::Nested>::type OtherNested;
- return ei_dot_impl<ThisNested, OtherNested>::run(derived(), other.derived());
+ return ei_dot_nocheck<Derived,OtherDerived>::run(*this, other);
}
/** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself.
diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h
index 0044fe7cb..9413b74fa 100644
--- a/Eigen/src/Core/Flagged.h
+++ b/Eigen/src/Core/Flagged.h
@@ -60,7 +60,8 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
inline int rows() const { return m_matrix.rows(); }
inline int cols() const { return m_matrix.cols(); }
- inline int stride() const { return m_matrix.stride(); }
+ inline int outerStride() const { return m_matrix.outerStride(); }
+ inline int innerStride() const { return m_matrix.innerStride(); }
inline const Scalar coeff(int row, int col) const
{
diff --git a/Eigen/src/Core/ForceAlignedAccess.h b/Eigen/src/Core/ForceAlignedAccess.h
index 927f43413..300b22329 100644
--- a/Eigen/src/Core/ForceAlignedAccess.h
+++ b/Eigen/src/Core/ForceAlignedAccess.h
@@ -52,7 +52,8 @@ template<typename ExpressionType> class ForceAlignedAccess
inline int rows() const { return m_expression.rows(); }
inline int cols() const { return m_expression.cols(); }
- inline int stride() const { return m_expression.stride(); }
+ inline int outerStride() const { return m_expression.outerStride(); }
+ inline int innerStride() const { return m_expression.innerStride(); }
inline const CoeffReturnType coeff(int row, int col) const
{
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index 31d0cff70..c2b317cc0 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -179,7 +179,7 @@ struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
enum {
Cost = 2 * NumTraits<Scalar>::MulCost,
PacketAccess = ei_packet_traits<Scalar>::size>1
- #if (defined EIGEN_VECTORIZE_SSE)
+ #if (defined EIGEN_VECTORIZE)
&& NumTraits<Scalar>::HasFloatingPoint
#endif
};
diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h
index 83688dbca..6e9a5439e 100644
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
@@ -33,10 +33,35 @@
* \param MatrixType the equivalent matrix type of the mapped data
* \param Options specifies whether the pointer is \c Aligned, or \c Unaligned.
* The default is \c Unaligned.
+ * \param StrideType optionnally specifies strides. By default, Map assumes the memory layout
+ * of an ordinary, contiguous array. This can be overridden by specifying strides.
+ * The type passed here must be a specialization of the Stride template, see examples below.
*
* This class represents a matrix or vector expression mapping an existing array of data.
* It can be used to let Eigen interface without any overhead with non-Eigen data structures,
- * such as plain C arrays or structures from other libraries.
+ * such as plain C arrays or structures from other libraries. By default, it assumes that the
+ * data is laid out contiguously in memory. You can however override this by explicitly specifying
+ * inner and outer strides.
+ *
+ * Here's an example of simply mapping a contiguous array as a column-major matrix:
+ * \include Map_simple.cpp
+ * Output: \verbinclude Map_simple.out
+ *
+ * If you need to map non-contiguous arrays, you can do so by specifying strides:
+ *
+ * Here's an example of mapping an array as a vector, specifying an inner stride, that is, the pointer
+ * increment between two consecutive coefficients. Here, we're specifying the inner stride as a compile-time
+ * fixed value.
+ * \include Map_inner_stride.cpp
+ * Output: \verbinclude Map_inner_stride.out
+ *
+ * Here's an example of mapping an array while specifying an outer stride. Here, since we're mapping
+ * as a column-major matrix, 'outer stride' means the pointer increment between two consecutive columns.
+ * Here, we're specifying the outer stride as a runtime parameter.
+ * \include Map_outer_stride.cpp
+ * Output: \verbinclude Map_outer_stride.out
+ *
+ * For more details and for an example of specifying both an inner and an outer stride, see class Stride.
*
* \b Tip: to change the array of data mapped by a Map object, you can use the C++
* placement new syntax:
@@ -48,48 +73,86 @@
*
* \sa Matrix::Map()
*/
-template<typename MatrixType, int Options>
-struct ei_traits<Map<MatrixType, Options> > : public ei_traits<MatrixType>
+template<typename MatrixType, int Options, typename StrideType>
+struct ei_traits<Map<MatrixType, Options, StrideType> >
+ : public ei_traits<MatrixType>
{
+ typedef typename MatrixType::Scalar Scalar;
enum {
- Flags = (Options&Aligned)==Aligned ? ei_traits<MatrixType>::Flags | AlignedBit
- : ei_traits<MatrixType>::Flags & ~AlignedBit
+ InnerStride = StrideType::InnerStrideAtCompileTime,
+ OuterStride = StrideType::OuterStrideAtCompileTime,
+ HasNoInnerStride = InnerStride <= 1,
+ HasNoOuterStride = OuterStride == 0,
+ HasNoStride = HasNoInnerStride && HasNoOuterStride,
+ IsAligned = int(int(Options)&Aligned)==Aligned,
+ IsDynamicSize = MatrixType::SizeAtCompileTime==Dynamic,
+ KeepsPacketAccess = bool(HasNoInnerStride)
+ && ( bool(IsDynamicSize)
+ || HasNoOuterStride
+ || ( OuterStride!=Dynamic && ((int(OuterStride)*sizeof(Scalar))%16)==0 ) ),
+ Flags0 = ei_traits<MatrixType>::Flags,
+ Flags1 = IsAligned ? int(Flags0) | AlignedBit : int(Flags0) & ~AlignedBit,
+ Flags2 = HasNoStride ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
+ Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
};
};
-template<typename MatrixType, int Options> class Map
- : public MapBase<Map<MatrixType, Options>,
- typename MatrixType::template MakeBase< Map<MatrixType, Options> >::Type>
+template<typename MatrixType, int Options, typename StrideType> class Map
+ : public MapBase<Map<MatrixType, Options, StrideType>,
+ typename MatrixType::template MakeBase<
+ Map<MatrixType, Options, StrideType>
+ >::Type>
{
public:
typedef MapBase<Map,typename MatrixType::template MakeBase<Map>::Type> Base;
- EIGEN_DENSE_PUBLIC_INTERFACE(Map)
-
- inline int stride() const { return this->innerSize(); }
- inline Map(const Scalar* data) : Base(data) {}
-
- inline Map(const Scalar* data, int size) : Base(data, size) {}
-
- inline Map(const Scalar* data, int rows, int cols) : Base(data, rows, cols) {}
+ EIGEN_DENSE_PUBLIC_INTERFACE(Map)
- inline void resize(int rows, int cols)
+ inline int innerStride() const
{
- EIGEN_ONLY_USED_FOR_DEBUG(rows);
- EIGEN_ONLY_USED_FOR_DEBUG(cols);
- ei_assert(rows == this->rows());
- ei_assert(cols == this->cols());
+ return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
}
- inline void resize(int size)
+ inline int outerStride() const
{
- EIGEN_STATIC_ASSERT_VECTOR_ONLY(MatrixType)
- EIGEN_ONLY_USED_FOR_DEBUG(size);
- ei_assert(size == this->size());
+ return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
+ : IsVectorAtCompileTime ? this->size()
+ : int(Flags)&RowMajorBit ? this->cols()
+ : this->rows();
}
+ /** Constructor in the fixed-size case.
+ *
+ * \param data pointer to the array to map
+ * \param stride optional Stride object, passing the strides.
+ */
+ inline Map(const Scalar* data, const StrideType& stride = StrideType())
+ : Base(data), m_stride(stride) {}
+
+ /** Constructor in the dynamic-size vector case.
+ *
+ * \param data pointer to the array to map
+ * \param size the size of the vector expression
+ * \param stride optional Stride object, passing the strides.
+ */
+ inline Map(const Scalar* data, int size, const StrideType& stride = StrideType())
+ : Base(data, size), m_stride(stride) {}
+
+ /** Constructor in the dynamic-size matrix case.
+ *
+ * \param data pointer to the array to map
+ * \param rows the number of rows of the matrix expression
+ * \param cols the number of columns of the matrix expression
+ * \param stride optional Stride object, passing the strides.
+ */
+ inline Map(const Scalar* data, int rows, int cols, const StrideType& stride = StrideType())
+ : Base(data, rows, cols), m_stride(stride) {}
+
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
+
+ protected:
+ StrideType m_stride;
};
template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols>
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index a922d8bb0..d735cfc47 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
@@ -37,9 +37,7 @@ template<typename Derived, typename Base> class MapBase
{
public:
-// typedef MatrixBase<Derived> Base;
enum {
- IsRowMajor = (int(ei_traits<Derived>::Flags) & RowMajorBit) ? 1 : 0,
RowsAtCompileTime = ei_traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = ei_traits<Derived>::ColsAtCompileTime,
SizeAtCompileTime = Base::SizeAtCompileTime
@@ -48,94 +46,75 @@ template<typename Derived, typename Base> class MapBase
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename Base::PacketScalar PacketScalar;
using Base::derived;
+ using Base::innerStride;
+ using Base::outerStride;
+ using Base::rowStride;
+ using Base::colStride;
inline int rows() const { return m_rows.value(); }
inline int cols() const { return m_cols.value(); }
- /** Returns the leading dimension (for matrices) or the increment (for vectors) to be used with data().
- *
- * More precisely:
- * - for a column major matrix it returns the number of elements between two successive columns
- * - for a row major matrix it returns the number of elements between two successive rows
- * - for a vector it returns the number of elements between two successive coefficients
- * This function has to be used together with the MapBase::data() function.
- *
- * \sa MapBase::data() */
- inline int stride() const { return derived().stride(); }
-
/** Returns a pointer to the first coefficient of the matrix or vector.
- * This function has to be used together with the stride() function.
*
- * \sa MapBase::stride() */
+ * \note When addressing this data, make sure to honor the strides returned by innerStride() and outerStride().
+ *
+ * \sa innerStride(), outerStride()
+ */
inline const Scalar* data() const { return m_data; }
inline const Scalar& coeff(int row, int col) const
{
- if(IsRowMajor)
- return m_data[col + row * stride()];
- else // column-major
- return m_data[row + col * stride()];
+ return m_data[col * colStride() + row * rowStride()];
}
inline Scalar& coeffRef(int row, int col)
{
- if(IsRowMajor)
- return const_cast<Scalar*>(m_data)[col + row * stride()];
- else // column-major
- return const_cast<Scalar*>(m_data)[row + col * stride()];
+ return const_cast<Scalar*>(m_data)[col * colStride() + row * rowStride()];
}
inline const Scalar& coeff(int index) const
{
ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit));
- if ( ((RowsAtCompileTime == 1) == IsRowMajor) || !int(Derived::IsVectorAtCompileTime) )
- return m_data[index];
- else
- return m_data[index*stride()];
+ return m_data[index * innerStride()];
}
inline Scalar& coeffRef(int index)
{
ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit));
- if ( ((RowsAtCompileTime == 1) == IsRowMajor) || !int(Derived::IsVectorAtCompileTime) )
- return const_cast<Scalar*>(m_data)[index];
- else
- return const_cast<Scalar*>(m_data)[index*stride()];
+ return const_cast<Scalar*>(m_data)[index * innerStride()];
}
template<int LoadMode>
inline PacketScalar packet(int row, int col) const
{
return ei_ploadt<Scalar, LoadMode>
- (m_data + (IsRowMajor ? col + row * stride()
- : row + col * stride()));
+ (m_data + (col * colStride() + row * rowStride()));
}
template<int LoadMode>
inline PacketScalar packet(int index) const
{
- return ei_ploadt<Scalar, LoadMode>(m_data + index);
+ return ei_ploadt<Scalar, LoadMode>(m_data + index * innerStride());
}
template<int StoreMode>
inline void writePacket(int row, int col, const PacketScalar& x)
{
ei_pstoret<Scalar, PacketScalar, StoreMode>
- (const_cast<Scalar*>(m_data) + (IsRowMajor ? col + row * stride()
- : row + col * stride()), x);
+ (const_cast<Scalar*>(m_data) + (col * colStride() + row * rowStride()), x);
}
template<int StoreMode>
inline void writePacket(int index, const PacketScalar& x)
{
ei_pstoret<Scalar, PacketScalar, StoreMode>
- (const_cast<Scalar*>(m_data) + index, x);
+ (const_cast<Scalar*>(m_data) + index * innerStride(), x);
}
inline MapBase(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
{
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
- checkDataAlignment();
+ checkSanity();
}
inline MapBase(const Scalar* data, int size)
@@ -146,7 +125,7 @@ template<typename Derived, typename Base> class MapBase
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
ei_assert(size >= 0);
ei_assert(data == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
- checkDataAlignment();
+ checkSanity();
}
inline MapBase(const Scalar* data, int rows, int cols)
@@ -155,7 +134,7 @@ template<typename Derived, typename Base> class MapBase
ei_assert( (data == 0)
|| ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
- checkDataAlignment();
+ checkSanity();
}
Derived& operator=(const MapBase& other)
@@ -167,10 +146,12 @@ template<typename Derived, typename Base> class MapBase
protected:
- void checkDataAlignment() const
+ void checkSanity() const
{
ei_assert( ((!(ei_traits<Derived>::Flags&AlignedBit))
|| ((size_t(m_data)&0xf)==0)) && "data is not aligned");
+ ei_assert( ((!(ei_traits<Derived>::Flags&PacketAccessBit))
+ || (innerStride()==1)) && "packet access incompatible with inner stride greater than 1");
}
const Scalar* EIGEN_RESTRICT m_data;
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 44a0ef7d1..e7422457c 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
@@ -318,6 +318,9 @@ class Matrix
void swap(MatrixBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other)
{ this->_swap(other.derived()); }
+ inline int innerStride() const { return 1; }
+ inline int outerStride() const { return this->innerSize(); }
+
/////////// Geometry module ///////////
template<typename OtherDerived>
@@ -331,6 +334,9 @@ class Matrix
#endif
protected:
+ template <typename Derived, typename OtherDerived, bool IsVector>
+ friend struct ei_conservative_resize_like_impl;
+
using Base::m_storage;
};
diff --git a/Eigen/src/Core/MatrixStorage.h b/Eigen/src/Core/MatrixStorage.h
index 046670452..ece603ffa 100644
--- a/Eigen/src/Core/MatrixStorage.h
+++ b/Eigen/src/Core/MatrixStorage.h
@@ -3,6 +3,7 @@
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -49,6 +50,12 @@ struct ei_matrix_array
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
};
+// FIXME!!! This is a hack because ARM gcc does not honour __attribute__((aligned(16))) properly
+#ifdef __ARM_NEON__
+ #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+ #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+ #endif
+#endif
#ifdef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
#else
@@ -92,6 +99,7 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class ei_matr
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); }
inline static int rows(void) {return _Rows;}
inline static int cols(void) {return _Cols;}
+ inline void conservativeResize(int,int,int) {}
inline void resize(int,int,int) {}
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
@@ -107,6 +115,7 @@ template<typename T, int _Rows, int _Cols, int _Options> class ei_matrix_storage
inline void swap(ei_matrix_storage& ) {}
inline static int rows(void) {return _Rows;}
inline static int cols(void) {return _Cols;}
+ inline void conservativeResize(int,int,int) {}
inline void resize(int,int,int) {}
inline const T *data() const { return 0; }
inline T *data() { return 0; }
@@ -127,11 +136,8 @@ template<typename T, int Size, int _Options> class ei_matrix_storage<T, Size, Dy
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
inline int rows(void) const {return m_rows;}
inline int cols(void) const {return m_cols;}
- inline void resize(int, int rows, int cols)
- {
- m_rows = rows;
- m_cols = cols;
- }
+ inline void conservativeResize(int, int rows, int cols) { m_rows = rows; m_cols = cols; }
+ inline void resize(int, int rows, int cols) { m_rows = rows; m_cols = cols; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
@@ -149,10 +155,8 @@ template<typename T, int Size, int _Cols, int _Options> class ei_matrix_storage<
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline int rows(void) const {return m_rows;}
inline int cols(void) const {return _Cols;}
- inline void resize(int /*size*/, int rows, int)
- {
- m_rows = rows;
- }
+ inline void conservativeResize(int, int rows, int) { m_rows = rows; }
+ inline void resize(int, int rows, int) { m_rows = rows; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
@@ -170,10 +174,8 @@ template<typename T, int Size, int _Rows, int _Options> class ei_matrix_storage<
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline int rows(void) const {return _Rows;}
inline int cols(void) const {return m_cols;}
- inline void resize(int, int, int cols)
- {
- m_cols = cols;
- }
+ inline void conservativeResize(int, int, int cols) { m_cols = cols; }
+ inline void resize(int, int, int cols) { m_cols = cols; }
inline const T *data() const { return m_data.array; }
inline T *data() { return m_data.array; }
};
@@ -196,6 +198,12 @@ template<typename T, int _Options> class ei_matrix_storage<T, Dynamic, Dynamic,
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
inline int rows(void) const {return m_rows;}
inline int cols(void) const {return m_cols;}
+ inline void conservativeResize(int size, int rows, int cols)
+ {
+ m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
+ m_rows = rows;
+ m_cols = cols;
+ }
void resize(int size, int rows, int cols)
{
if(size != m_rows*m_cols)
@@ -228,6 +236,11 @@ template<typename T, int _Rows, int _Options> class ei_matrix_storage<T, Dynamic
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline static int rows(void) {return _Rows;}
inline int cols(void) const {return m_cols;}
+ inline void conservativeResize(int size, int, int cols)
+ {
+ m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
+ m_cols = cols;
+ }
void resize(int size, int, int cols)
{
if(size != _Rows*m_cols)
@@ -259,6 +272,11 @@ template<typename T, int _Cols, int _Options> class ei_matrix_storage<T, Dynamic
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline int rows(void) const {return m_rows;}
inline static int cols(void) {return _Cols;}
+ inline void conservativeResize(int size, int rows, int)
+ {
+ m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
+ m_rows = rows;
+ }
void resize(int size, int rows, int)
{
if(size != m_rows*_Cols)
diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h
index 9f6d1c0c0..497ce828b 100644
--- a/Eigen/src/Core/NestByValue.h
+++ b/Eigen/src/Core/NestByValue.h
@@ -53,7 +53,8 @@ template<typename ExpressionType> class NestByValue
inline int rows() const { return m_expression.rows(); }
inline int cols() const { return m_expression.cols(); }
- inline int stride() const { return m_expression.stride(); }
+ inline int outerStride() const { return m_expression.outerStride(); }
+ inline int innerStride() const { return m_expression.innerStride(); }
inline const CoeffReturnType coeff(int row, int col) const
{
diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index fcd2e46cc..46884dc3f 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -47,7 +47,7 @@
* \sa class DiagonalMatrix
*/
template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime> class PermutationMatrix;
-template<typename PermutationType, typename MatrixType, int Side> struct ei_permut_matrix_product_retval;
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false> struct ei_permut_matrix_product_retval;
template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
struct ei_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
@@ -132,6 +132,9 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
/** \returns the number of columns */
inline int cols() const { return m_indices.size(); }
+ /** \returns the size of a side of the respective square matrix, i.e., the number of indices */
+ inline int size() const { return m_indices.size(); }
+
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename DenseDerived>
void evalTo(MatrixBase<DenseDerived>& other) const
@@ -213,16 +216,29 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
return *this;
}
- /**** inversion and multiplication helpers to hopefully get RVO ****/
+ /** \returns the inverse permutation matrix.
+ *
+ * \note \note_try_to_help_rvo
+ */
+ inline Transpose<PermutationMatrix> inverse() const
+ { return *this; }
+ /** \returns the tranpose permutation matrix.
+ *
+ * \note \note_try_to_help_rvo
+ */
+ inline Transpose<PermutationMatrix> transpose() const
+ { return *this; }
+
+ /**** multiplication helpers to hopefully get RVO ****/
#ifndef EIGEN_PARSED_BY_DOXYGEN
- protected:
- enum Inverse_t {Inverse};
- PermutationMatrix(Inverse_t, const PermutationMatrix& other)
- : m_indices(other.m_indices.size())
+ template<int OtherSize, int OtherMaxSize>
+ PermutationMatrix(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other)
+ : m_indices(other.nestedPermutation().size())
{
- for (int i=0; i<rows();++i) m_indices.coeffRef(other.m_indices.coeff(i)) = i;
+ for (int i=0; i<rows();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
}
+ protected:
enum Product_t {Product};
PermutationMatrix(Product_t, const PermutationMatrix& lhs, const PermutationMatrix& rhs)
: m_indices(lhs.m_indices.size())
@@ -233,12 +249,7 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
#endif
public:
- /** \returns the inverse permutation matrix.
- *
- * \note \note_try_to_help_rvo
- */
- inline PermutationMatrix inverse() const
- { return PermutationMatrix(Inverse, *this); }
+
/** \returns the product permutation matrix.
*
* \note \note_try_to_help_rvo
@@ -247,6 +258,22 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
inline PermutationMatrix operator*(const PermutationMatrix<OtherSize, OtherMaxSize>& other) const
{ return PermutationMatrix(Product, *this, other); }
+ /** \returns the product of a permutation with another inverse permutation.
+ *
+ * \note \note_try_to_help_rvo
+ */
+ template<int OtherSize, int OtherMaxSize>
+ inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other) const
+ { return PermutationMatrix(Product, *this, other.eval()); }
+
+ /** \returns the product of an inverse permutation with another permutation.
+ *
+ * \note \note_try_to_help_rvo
+ */
+ template<int OtherSize, int OtherMaxSize> friend
+ inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other, const PermutationMatrix& perm)
+ { return PermutationMatrix(Product, other.eval(), perm); }
+
protected:
IndicesType m_indices;
@@ -277,15 +304,15 @@ operator*(const PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> &perm
(permutation, matrix.derived());
}
-template<typename PermutationType, typename MatrixType, int Side>
-struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side> >
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
+struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
{
typedef typename MatrixType::PlainObject ReturnType;
};
-template<typename PermutationType, typename MatrixType, int Side>
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
struct ei_permut_matrix_product_retval
- : public ReturnByValue<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side> >
+ : public ReturnByValue<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
{
typedef typename ei_cleantype<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
@@ -299,21 +326,46 @@ struct ei_permut_matrix_product_retval
template<typename Dest> inline void evalTo(Dest& dst) const
{
const int n = Side==OnTheLeft ? rows() : cols();
- for(int i = 0; i < n; ++i)
+
+ if(ei_is_same_type<MatrixTypeNestedCleaned,Dest>::ret && ei_extract_data(dst) == ei_extract_data(m_matrix))
{
- Block<
- Dest,
- Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime,
- Side==OnTheRight ? 1 : Dest::ColsAtCompileTime
- >(dst, Side==OnTheLeft ? m_permutation.indices().coeff(i) : i)
-
- =
-
- Block<
- MatrixTypeNestedCleaned,
- Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,
- Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime
- >(m_matrix, Side==OnTheRight ? m_permutation.indices().coeff(i) : i);
+ // apply the permutation inplace
+ Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
+ mask.fill(false);
+ int r = 0;
+ while(r < m_permutation.size())
+ {
+ // search for the next seed
+ while(r<m_permutation.size() && mask[r]) r++;
+ if(r>=m_permutation.size())
+ break;
+ // we got one, let's follow it until we are back to the seed
+ int k0 = r++;
+ int kPrev = k0;
+ mask.coeffRef(k0) = true;
+ for(int k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
+ {
+ Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
+ .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+ (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
+
+ mask.coeffRef(k) = true;
+ kPrev = k;
+ }
+ }
+ }
+ else
+ {
+ for(int i = 0; i < n; ++i)
+ {
+ Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+ (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i)
+
+ =
+
+ Block<MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime>
+ (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
+ }
}
}
@@ -322,4 +374,78 @@ struct ei_permut_matrix_product_retval
const typename MatrixType::Nested m_matrix;
};
+/* Template partial specialization for transposed/inverse permutations */
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
+struct ei_traits<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > >
+ : ei_traits<Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+{};
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
+class Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
+ : public EigenBase<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > >
+{
+ typedef PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> PermutationType;
+ typedef typename PermutationType::IndicesType IndicesType;
+ public:
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef ei_traits<PermutationType> Traits;
+ typedef Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime>
+ DenseMatrixType;
+ enum {
+ Flags = Traits::Flags,
+ CoeffReadCost = Traits::CoeffReadCost,
+ RowsAtCompileTime = Traits::RowsAtCompileTime,
+ ColsAtCompileTime = Traits::ColsAtCompileTime,
+ MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
+ };
+ typedef typename Traits::Scalar Scalar;
+ #endif
+
+ Transpose(const PermutationType& p) : m_permutation(p) {}
+
+ inline int rows() const { return m_permutation.rows(); }
+ inline int cols() const { return m_permutation.cols(); }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename DenseDerived>
+ void evalTo(MatrixBase<DenseDerived>& other) const
+ {
+ other.setZero();
+ for (int i=0; i<rows();++i)
+ other.coeffRef(i, m_permutation.indices().coeff(i)) = typename DenseDerived::Scalar(1);
+ }
+ #endif
+
+ /** \return the equivalent permutation matrix */
+ PermutationType eval() const { return *this; }
+
+ DenseMatrixType toDenseMatrix() const { return *this; }
+
+ /** \returns the matrix with the inverse permutation applied to the columns.
+ */
+ template<typename Derived> friend
+ inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true>
+ operator*(const MatrixBase<Derived>& matrix, const Transpose& trPerm)
+ {
+ return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true>(trPerm.m_permutation, matrix.derived());
+ }
+
+ /** \returns the matrix with the inverse permutation applied to the rows.
+ */
+ template<typename Derived>
+ inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true>
+ operator*(const MatrixBase<Derived>& matrix) const
+ {
+ return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true>(m_permutation, matrix.derived());
+ }
+
+ const PermutationType& nestedPermutation() const { return m_permutation; }
+
+ protected:
+ const PermutationType& m_permutation;
+};
+
#endif // EIGEN_PERMUTATIONMATRIX_H
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index fe6d29c7d..865387b11 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -68,9 +68,9 @@ template<typename Lhs, typename Rhs> struct ei_product_type
// is to work around an internal compiler error with gcc 4.1 and 4.2.
private:
enum {
- rows_select = Rows >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Rows==1 ? 1 : Small),
- cols_select = Cols >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Cols==1 ? 1 : Small),
- depth_select = Depth>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Depth==1 ? 1 : Small)
+ rows_select = Rows >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Rows==1 ? 1 : Small),
+ cols_select = Cols >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Cols==1 ? 1 : Small),
+ depth_select = Depth>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Depth==1 ? 1 : Small)
};
typedef ei_product_type_selector<rows_select, cols_select, depth_select> product_type_selector;
@@ -93,12 +93,12 @@ template<> struct ei_product_type_selector<Small,Small,Small>
template<> struct ei_product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
-template<> struct ei_product_type_selector<1, Large,Small> { enum { ret = GemvProduct }; };
+template<> struct ei_product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
-template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = GemvProduct }; };
-template<> struct ei_product_type_selector<Large,1, Small> { enum { ret = GemvProduct }; };
+template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
+template<> struct ei_product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
-template<> struct ei_product_type_selector<Small,1, Large> { enum { ret = GemvProduct }; };
+template<> struct ei_product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct ei_product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
@@ -427,6 +427,10 @@ template<typename OtherDerived>
inline const typename ProductReturnType<Derived,OtherDerived>::Type
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{
+ // A note regarding the function declaration: In MSVC, this function will sometimes
+ // not be inlined since ei_matrix_storage is an unwindable object for dynamic
+ // matrices and product types are holding a member to store the result.
+ // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
enum {
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|| OtherDerived::RowsAtCompileTime==Dynamic
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index d5b0c60c2..75297dcb9 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -40,7 +40,7 @@ struct ei_redux_traits
private:
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
- InnerMaxSize = int(Derived::Flags)&RowMajorBit
+ InnerMaxSize = int(Derived::IsRowMajor)
? Derived::MaxColsAtCompileTime
: Derived::MaxRowsAtCompileTime
};
@@ -100,15 +100,15 @@ template<typename Func, typename Derived, int Start>
struct ei_redux_novec_unroller<Func, Derived, Start, 1>
{
enum {
- col = Start / Derived::RowsAtCompileTime,
- row = Start % Derived::RowsAtCompileTime
+ outer = Start / Derived::InnerSizeAtCompileTime,
+ inner = Start % Derived::InnerSizeAtCompileTime
};
typedef typename Derived::Scalar Scalar;
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func&)
{
- return mat.coeff(row, col);
+ return mat.coeffByOuterInner(outer, inner);
}
};
@@ -148,12 +148,8 @@ struct ei_redux_vec_unroller<Func, Derived, Start, 1>
{
enum {
index = Start * ei_packet_traits<typename Derived::Scalar>::size,
- row = int(Derived::Flags)&RowMajorBit
- ? index / int(Derived::ColsAtCompileTime)
- : index % Derived::RowsAtCompileTime,
- col = int(Derived::Flags)&RowMajorBit
- ? index % int(Derived::ColsAtCompileTime)
- : index / Derived::RowsAtCompileTime,
+ outer = index / int(Derived::InnerSizeAtCompileTime),
+ inner = index % int(Derived::InnerSizeAtCompileTime),
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
};
@@ -162,7 +158,7 @@ struct ei_redux_vec_unroller<Func, Derived, Start, 1>
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&)
{
- return mat.template packet<alignment>(row, col);
+ return mat.template packetByOuterInner<alignment>(outer, inner);
}
};
@@ -184,12 +180,12 @@ struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
Scalar res;
- res = mat.coeff(0, 0);
- for(int i = 1; i < mat.rows(); ++i)
- res = func(res, mat.coeff(i, 0));
- for(int j = 1; j < mat.cols(); ++j)
- for(int i = 0; i < mat.rows(); ++i)
- res = func(res, mat.coeff(i, j));
+ res = mat.coeffByOuterInner(0, 0);
+ for(int i = 1; i < mat.innerSize(); ++i)
+ res = func(res, mat.coeffByOuterInner(0, i));
+ for(int i = 1; i < mat.outerSize(); ++i)
+ for(int j = 0; j < mat.innerSize(); ++j)
+ res = func(res, mat.coeffByOuterInner(i, j));
return res;
}
};
@@ -253,8 +249,7 @@ struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
const int innerSize = mat.innerSize();
const int outerSize = mat.outerSize();
enum {
- packetSize = ei_packet_traits<Scalar>::size,
- isRowMajor = Derived::Flags&RowMajorBit?1:0
+ packetSize = ei_packet_traits<Scalar>::size
};
const int packetedInnerSize = ((innerSize)/packetSize)*packetSize;
Scalar res;
@@ -263,13 +258,12 @@ struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
for(int j=0; j<outerSize; ++j)
for(int i=0; i<packetedInnerSize; i+=int(packetSize))
- packet_res = func.packetOp(packet_res, mat.template packet<Unaligned>
- (isRowMajor?j:i, isRowMajor?i:j));
+ packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
res = func.predux(packet_res);
for(int j=0; j<outerSize; ++j)
for(int i=packetedInnerSize; i<innerSize; ++i)
- res = func(res, mat.coeff(isRowMajor?j:i, isRowMajor?i:j));
+ res = func(res, mat.coeffByOuterInner(j,i));
}
else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h
index d375f0b5c..160b973bd 100644
--- a/Eigen/src/Core/ReturnByValue.h
+++ b/Eigen/src/Core/ReturnByValue.h
@@ -34,14 +34,9 @@ struct ei_traits<ReturnByValue<Derived> >
: public ei_traits<typename ei_traits<Derived>::ReturnType>
{
enum {
- // FIXME had to remove the DirectAccessBit for usage like
- // matrix.inverse().block(...)
- // because the Block ctor with direct access
- // wants to call coeffRef() to get an address, and that fails (infinite recursion) as ReturnByValue
- // doesnt implement coeffRef().
- // The fact that I had to do that shows that when doing xpr.block() with a non-direct-access xpr,
- // even if xpr has the EvalBeforeNestingBit, the block() doesn't use direct access on the evaluated
- // xpr.
+ // We're disabling the DirectAccess because e.g. the constructor of
+ // the Block-with-DirectAccess expression requires to have a coeffRef method.
+ // Also, we don't want to have to implement the stride stuff.
Flags = (ei_traits<typename ei_traits<Derived>::ReturnType>::Flags
| EvalBeforeNestingBit) & ~DirectAccessBit
};
diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h
index c3c5b17ff..0b57b9968 100644
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -75,8 +75,9 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
inline int rows() const { return m_matrix.rows(); }
inline int cols() const { return m_matrix.cols(); }
- inline int stride() const { return m_matrix.stride(); }
-
+ inline int outerStride() const { return m_matrix.outerStride(); }
+ inline int innerStride() const { return m_matrix.innerStride(); }
+
/** \sa MatrixBase::coeff()
* \warning the coordinates must fit into the referenced triangular part
*/
diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h
index d2690b66b..529a9994d 100644
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -57,7 +57,8 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
inline int rows() const { return m_matrix.rows(); }
inline int cols() const { return m_matrix.cols(); }
- inline int stride() const { return m_matrix.stride(); }
+ inline int outerStride() const { return m_matrix.outerStride(); }
+ inline int innerStride() const { return m_matrix.innerStride(); }
inline const Scalar* data() const { return m_matrix.data(); }
// note that this function is needed by assign to correctly align loads/stores
diff --git a/Eigen/src/Core/Stride.h b/Eigen/src/Core/Stride.h
new file mode 100644
index 000000000..d960dd2fc
--- /dev/null
+++ b/Eigen/src/Core/Stride.h
@@ -0,0 +1,113 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_STRIDE_H
+#define EIGEN_STRIDE_H
+
+/** \class Stride
+ *
+ * \brief Holds strides information for Map
+ *
+ * This class holds the strides information for mapping arrays with strides with class Map.
+ *
+ * It holds two values: the inner stride and the outer stride.
+ *
+ * The inner stride is the pointer increment between two consecutive entries within a given row of a
+ * row-major matrix or within a given column of a column-major matrix.
+ *
+ * The outer stride is the pointer increment between two consecutive rows of a row-major matrix or
+ * between two consecutive columns of a column-major matrix.
+ *
+ * These two values can be passed either at compile-time as template parameters, or at runtime as
+ * arguments to the constructor.
+ *
+ * Indeed, this class takes two template parameters:
+ * \param _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
+ * \param _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
+ *
+ * \include Map_general_stride.cpp
+ * Output: \verbinclude Map_general_stride.out
+ *
+ * \sa class InnerStride, class OuterStride
+ */
+template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
+class Stride
+{
+ public:
+
+ enum {
+ InnerStrideAtCompileTime = _InnerStrideAtCompileTime,
+ OuterStrideAtCompileTime = _OuterStrideAtCompileTime
+ };
+
+ /** Default constructor, for use when strides are fixed at compile time */
+ Stride()
+ : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
+ {
+ ei_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
+ }
+
+ /** Constructor allowing to pass the strides at runtime */
+ Stride(int outerStride, int innerStride)
+ : m_outer(outerStride), m_inner(innerStride)
+ {
+ ei_assert(innerStride>=0 && outerStride>=0);
+ }
+
+ /** Copy constructor */
+ Stride(const Stride& other)
+ : m_outer(other.outer()), m_inner(other.inner())
+ {}
+
+ /** \returns the outer stride */
+ inline int outer() const { return m_outer.value(); }
+ /** \returns the inner stride */
+ inline int inner() const { return m_inner.value(); }
+
+ protected:
+ ei_int_if_dynamic<OuterStrideAtCompileTime> m_outer;
+ ei_int_if_dynamic<InnerStrideAtCompileTime> m_inner;
+};
+
+/** \brief Convenience specialization of Stride to specify only an inner stride */
+template<int Value>
+class InnerStride : public Stride<0, Value>
+{
+ typedef Stride<0, Value> Base;
+ public:
+ InnerStride() : Base() {}
+ InnerStride(int v) : Base(0, v) {}
+};
+
+/** \brief Convenience specialization of Stride to specify only an outer stride */
+template<int Value>
+class OuterStride : public Stride<Value, 0>
+{
+ typedef Stride<Value, 0> Base;
+ public:
+ OuterStride() : Base() {}
+ OuterStride(int v) : Base(v,0) {}
+};
+
+#endif // EIGEN_STRIDE_H
diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h
index 186268af0..c3c641097 100644
--- a/Eigen/src/Core/Swap.h
+++ b/Eigen/src/Core/Swap.h
@@ -47,7 +47,8 @@ template<typename ExpressionType> class SwapWrapper
inline int rows() const { return m_expression.rows(); }
inline int cols() const { return m_expression.cols(); }
- inline int stride() const { return m_expression.stride(); }
+ inline int outerStride() const { return m_expression.outerStride(); }
+ inline int innerStride() const { return m_expression.innerStride(); }
inline Scalar& coeffRef(int row, int col)
{
@@ -60,7 +61,7 @@ template<typename ExpressionType> class SwapWrapper
}
template<typename OtherDerived>
- void copyCoeff(int row, int col, const MatrixBase<OtherDerived>& other)
+ void copyCoeff(int row, int col, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(row >= 0 && row < rows()
@@ -71,7 +72,7 @@ template<typename ExpressionType> class SwapWrapper
}
template<typename OtherDerived>
- void copyCoeff(int index, const MatrixBase<OtherDerived>& other)
+ void copyCoeff(int index, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(index >= 0 && index < m_expression.size());
@@ -81,7 +82,7 @@ template<typename ExpressionType> class SwapWrapper
}
template<typename OtherDerived, int StoreMode, int LoadMode>
- void copyPacket(int row, int col, const MatrixBase<OtherDerived>& other)
+ void copyPacket(int row, int col, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(row >= 0 && row < rows()
@@ -94,7 +95,7 @@ template<typename ExpressionType> class SwapWrapper
}
template<typename OtherDerived, int StoreMode, int LoadMode>
- void copyPacket(int index, const MatrixBase<OtherDerived>& other)
+ void copyPacket(int index, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
ei_internal_assert(index >= 0 && index < m_expression.size());
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index bd06d8464..1f064d1c2 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -93,7 +93,8 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
typedef typename MatrixType::template MakeBase<Transpose<MatrixType> >::Type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
- inline int stride() const { return derived().nestedExpression().stride(); }
+ inline int innerStride() const { return derived().nestedExpression().innerStride(); }
+ inline int outerStride() const { return derived().nestedExpression().outerStride(); }
inline Scalar* data() { return derived().nestedExpression().data(); }
inline const Scalar* data() const { return derived().nestedExpression().data(); }
@@ -295,25 +296,6 @@ struct ei_blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr> >
static inline const XprType extract(const XprType& x) { return x; }
};
-
-template<typename T, int Access=ei_blas_traits<T>::ActualAccess>
-struct ei_extract_data_selector {
- static typename T::Scalar* run(const T& m)
- {
- return &ei_blas_traits<T>::extract(m).const_cast_derived().coeffRef(0,0);
- }
-};
-
-template<typename T>
-struct ei_extract_data_selector<T,NoDirectAccess> {
- static typename T::Scalar* run(const T&) { return 0; }
-};
-
-template<typename T> typename T::Scalar* ei_extract_data(const T& m)
-{
- return ei_extract_data_selector<T>::run(m);
-}
-
template<typename Scalar, bool DestIsTranposed, typename OtherDerived>
struct ei_check_transpose_aliasing_selector
{
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index 7d978b800..2230680d1 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -50,7 +50,8 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
inline int rows() const { return derived().rows(); }
inline int cols() const { return derived().cols(); }
- inline int stride() const { return derived().stride(); }
+ inline int outerStride() const { return derived().outerStride(); }
+ inline int innerStride() const { return derived().innerStride(); }
inline Scalar coeff(int row, int col) const { return derived().coeff(row,col); }
inline Scalar& coeffRef(int row, int col) { return derived().coeffRef(row,col); }
@@ -165,7 +166,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
inline int rows() const { return m_matrix.rows(); }
inline int cols() const { return m_matrix.cols(); }
- inline int stride() const { return m_matrix.stride(); }
+ inline int outerStride() const { return m_matrix.outerStride(); }
+ inline int innerStride() const { return m_matrix.innerStride(); }
/** \sa MatrixBase::operator+=() */
template<typename Other> TriangularView& operator+=(const Other& other) { return *this = m_matrix + other; }
diff --git a/Eigen/src/Core/VectorBlock.h b/Eigen/src/Core/VectorBlock.h
index cbf97aeb3..5bb7fd35d 100644
--- a/Eigen/src/Core/VectorBlock.h
+++ b/Eigen/src/Core/VectorBlock.h
@@ -86,7 +86,6 @@ template<typename VectorType, int Size> class VectorBlock
IsColVector ? start : 0, IsColVector ? 0 : start,
IsColVector ? size : 1, IsColVector ? 1 : size)
{
-
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
}
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 1526a4b97..449de2078 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -169,6 +169,11 @@ template<> inline v4f ei_pdiv(const v4f& a, const v4f& b) {
return res;
}
+template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
+{ ei_assert(false && "packet integer division are not supported by AltiVec");
+ return ei_pset1<int>(0);
+}
+
template<> inline v4f ei_pmadd(const v4f& a, const v4f& b, const v4f& c) { return vec_madd(a, b, c); }
template<> inline v4f ei_pmin(const v4f& a, const v4f& b) { return vec_min(a,b); }
diff --git a/Eigen/src/Core/arch/CMakeLists.txt b/Eigen/src/Core/arch/CMakeLists.txt
index 8ddba284e..5470ed8f3 100644
--- a/Eigen/src/Core/arch/CMakeLists.txt
+++ b/Eigen/src/Core/arch/CMakeLists.txt
@@ -1,2 +1,3 @@
ADD_SUBDIRECTORY(SSE)
-ADD_SUBDIRECTORY(AltiVec) \ No newline at end of file
+ADD_SUBDIRECTORY(AltiVec)
+ADD_SUBDIRECTORY(NEON)
diff --git a/Eigen/src/Core/arch/Default/Settings.h b/Eigen/src/Core/arch/Default/Settings.h
new file mode 100644
index 000000000..1e7cebdba
--- /dev/null
+++ b/Eigen/src/Core/arch/Default/Settings.h
@@ -0,0 +1,65 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+
+/* All the parameters defined in this file can be specialized in the
+ * architecture specific files, and/or by the user.
+ * More to come... */
+
+#ifndef EIGEN_DEFAULT_SETTINGS_H
+#define EIGEN_DEFAULT_SETTINGS_H
+
+/** Defines the maximal loop size to enable meta unrolling of loops.
+ * Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
+ * it does not correspond to the number of iterations or the number of instructions
+ */
+#ifndef EIGEN_UNROLLING_LIMIT
+#define EIGEN_UNROLLING_LIMIT 100
+#endif
+
+/** Defines the threshold between a "small" and a "large" matrix.
+ * This threshold is mainly used to select the proper product implementation.
+ */
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+
+/** Defines the maximal size in Bytes of blocks fitting in CPU cache.
+ * The current value is set to generate blocks of 256x256 for float
+ *
+ * Typically for a single-threaded application you would set that to 25% of the size of your CPU caches in bytes
+ */
+#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
+#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*256*256)
+#endif
+
+/** Defines the maximal width of the blocks used in the triangular product and solver
+ * for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
+ */
+#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
+#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
+#endif
+
+#endif // EIGEN_DEFAULT_SETTINGS_H
diff --git a/Eigen/src/Core/arch/NEON/CMakeLists.txt b/Eigen/src/Core/arch/NEON/CMakeLists.txt
new file mode 100644
index 000000000..fd4d4af50
--- /dev/null
+++ b/Eigen/src/Core/arch/NEON/CMakeLists.txt
@@ -0,0 +1,6 @@
+FILE(GLOB Eigen_Core_arch_NEON_SRCS "*.h")
+
+INSTALL(FILES
+ ${Eigen_Core_arch_NEON_SRCS}
+ DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/NEON COMPONENT Devel
+)
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
new file mode 100644
index 000000000..f71b92a75
--- /dev/null
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -0,0 +1,372 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2010 Konstantinos Margaritis <markos@codex.gr>
+// Heavily based on Gael's SSE version.
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_PACKET_MATH_NEON_H
+#define EIGEN_PACKET_MATH_NEON_H
+
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+
+#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
+#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 4*96*96
+#endif
+
+typedef float32x4_t Packet4f;
+typedef int32x4_t Packet4i;
+
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ const Packet4f ei_p4f_##NAME = ei_pset1<float>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1<int>(X))
+
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ const Packet4i ei_p4i_##NAME = ei_pset1<int>(X)
+
+template<> struct ei_packet_traits<float> : ei_default_packet_traits
+{
+ typedef Packet4f type; enum {size=4};
+ enum {
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 0,
+ HasExp = 0,
+ HasSqrt = 0
+ };
+};
+template<> struct ei_packet_traits<int> : ei_default_packet_traits
+{ typedef Packet4i type; enum {size=4}; };
+
+template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
+template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return vdupq_n_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { return vdupq_n_s32(from); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a)
+{
+ Packet4f countdown = { 3, 2, 1, 0 };
+ return vaddq_f32(ei_pset1(a), countdown);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a)
+{
+ Packet4i countdown = { 3, 2, 1, 0 };
+ return vaddq_s32(ei_pset1(a), countdown);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a) { return vnegq_f32(a); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a) { return vnegq_s32(a); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f inv, restep, div;
+
+ // NEON does not offer a divide instruction, we have to do a reciprocal approximation
+ // However NEON in contrast to other SIMD engines (AltiVec/SSE), offers
+ // a reciprocal estimate AND a reciprocal step -which saves a few instructions
+ // vrecpeq_f32() returns an estimate to 1/b, which we will finetune with
+ // Newton-Raphson and vrecpsq_f32()
+ inv = vrecpeq_f32(b);
+
+ // This returns a differential, by which we will have to multiply inv to get a better
+ // approximation of 1/b.
+ restep = vrecpsq_f32(b, inv);
+ inv = vmulq_f32(restep, inv);
+
+ // Finally, multiply a by 1/b and get the wanted result of the division.
+ div = vmulq_f32(a, inv);
+
+ return div;
+}
+template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
+{ ei_assert(false && "packet integer division are not supported by NEON");
+ return ei_pset1<int>(0);
+}
+
+// for some weird raisons, it has to be overloaded for packet of integers
+template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
+
+// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
+template<> EIGEN_STRONG_INLINE Packet4f ei_pand<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i ei_pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_por<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i ei_por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
+
+template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
+template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
+
+template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
+template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
+
+// FIXME only store the 2 first elements ?
+template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) {
+ float32x2_t a_lo, a_hi;
+ Packet4f a_r64, a_r128;
+
+ a_r64 = vrev64q_f32(a);
+ a_lo = vget_low_f32(a_r64);
+ a_hi = vget_high_f32(a_r64);
+ a_r128 = vcombine_f32(a_hi, a_lo);
+
+ return a_r128;
+}
+template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) {
+ int32x2_t a_lo, a_hi;
+ Packet4i a_r64, a_r128;
+
+ a_r64 = vrev64q_s32(a);
+ a_lo = vget_low_s32(a_r64);
+ a_hi = vget_high_s32(a_r64);
+ a_r128 = vcombine_s32(a_hi, a_lo);
+
+ return a_r128;
+}
+template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) { return vabsq_f32(a); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { return vabsq_s32(a); }
+
+template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, sum;
+ float s[2];
+
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ sum = vpadd_f32(a_lo, a_hi);
+ sum = vpadd_f32(sum, sum);
+ vst1_f32(s, sum);
+
+ return s[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
+{
+ float32x4x2_t vtrn1, vtrn2, res1, res2;
+ Packet4f sum1, sum2, sum;
+
+ // NEON zip performs interleaving of the supplied vectors.
+ // We perform two interleaves in a row to acquire the transposed vector
+ vtrn1 = vzipq_f32(vecs[0], vecs[2]);
+ vtrn2 = vzipq_f32(vecs[1], vecs[3]);
+ res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
+ res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
+
+ // Do the addition of the resulting vectors
+ sum1 = vaddq_f32(res1.val[0], res1.val[1]);
+ sum2 = vaddq_f32(res2.val[0], res2.val[1]);
+ sum = vaddq_f32(sum1, sum2);
+
+ return sum;
+}
+
+template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, sum;
+ int32_t s[2];
+
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ sum = vpadd_s32(a_lo, a_hi);
+ sum = vpadd_s32(sum, sum);
+ vst1_s32(s, sum);
+
+ return s[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vecs)
+{
+ int32x4x2_t vtrn1, vtrn2, res1, res2;
+ Packet4i sum1, sum2, sum;
+
+ // NEON zip performs interleaving of the supplied vectors.
+ // We perform two interleaves in a row to acquire the transposed vector
+ vtrn1 = vzipq_s32(vecs[0], vecs[2]);
+ vtrn2 = vzipq_s32(vecs[1], vecs[3]);
+ res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]);
+ res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]);
+
+ // Do the addition of the resulting vectors
+ sum1 = vaddq_s32(res1.val[0], res1.val[1]);
+ sum2 = vaddq_s32(res2.val[0], res2.val[1]);
+ sum = vaddq_s32(sum1, sum2);
+
+ return sum;
+}
+
+// Other reduction functions:
+// mul
+template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, prod;
+ float s[2];
+
+ // Get a_lo = |a1|a2| and a_hi = |a3|a4|
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ // Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
+ prod = vmul_f32(a_lo, a_hi);
+ // Multiply prod with its swapped value |a2*a4|a1*a3|
+ prod = vmul_f32(prod, vrev64_f32(prod));
+ vst1_f32(s, prod);
+
+ return s[0];
+}
+template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, prod;
+ int32_t s[2];
+
+ // Get a_lo = |a1|a2| and a_hi = |a3|a4|
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ // Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
+ prod = vmul_s32(a_lo, a_hi);
+ // Multiply prod with its swapped value |a2*a4|a1*a3|
+ prod = vmul_s32(prod, vrev64_s32(prod));
+ vst1_s32(s, prod);
+
+ return s[0];
+}
+
+// min
+template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, min;
+ float s[2];
+
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ min = vpmin_f32(a_lo, a_hi);
+ min = vpmin_f32(min, min);
+ vst1_f32(s, min);
+
+ return s[0];
+}
+template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, min;
+ int32_t s[2];
+
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ min = vpmin_s32(a_lo, a_hi);
+ min = vpmin_s32(min, min);
+ vst1_s32(s, min);
+
+ return s[0];
+}
+
+// max
+template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, max;
+ float s[2];
+
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ max = vpmax_f32(a_lo, a_hi);
+ max = vpmax_f32(max, max);
+ vst1_f32(s, max);
+
+ return s[0];
+}
+template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, max;
+ int32_t s[2];
+
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ max = vpmax_s32(a_lo, a_hi);
+ max = vpmax_s32(max, max);
+ vst1_s32(s, max);
+
+ return s[0];
+}
+
+template<int Offset>
+struct ei_palign_impl<Offset,Packet4f>
+{
+ EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset!=0)
+ first = vextq_f32(first, second, Offset);
+ }
+};
+
+template<int Offset>
+struct ei_palign_impl<Offset,Packet4i>
+{
+ EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset!=0)
+ first = vextq_s32(first, second, Offset);
+ }
+};
+#endif // EIGEN_PACKET_MATH_NEON_H
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index de96aaffa..282a1971c 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -122,7 +122,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
{
-#ifdef __SSE4_1__
+#ifdef EIGEN_VECTORIZE_SSE4_1
return _mm_mullo_epi32(a,b);
#else
// this version is slightly faster than 4 scalar products
@@ -269,7 +269,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a)
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
{
- #ifdef __SSSE3__
+ #ifdef EIGEN_VECTORIZE_SSSE3
return _mm_abs_epi32(a);
#else
Packet4i aux = _mm_srai_epi32(a,31);
@@ -285,7 +285,7 @@ EIGEN_STRONG_INLINE void ei_punpackp(Packet4f* vecs)
vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
}
-#ifdef __SSE3__
+#ifdef EIGEN_VECTORIZE_SSE3
// TODO implement SSE2 versions as well as integer versions
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
{
@@ -446,7 +446,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
// }
#endif
-#ifdef __SSSE3__
+#ifdef EIGEN_VECTORIZE_SSSE3
// SSSE3 versions
template<int Offset>
struct ei_palign_impl<Offset,Packet4f>
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index 3343b1875..e8016e915 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -305,10 +305,7 @@ struct ei_product_coeff_vectorized_dyn_selector
{
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
- res = ei_dot_impl<
- Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
- Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>,
- LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs.col(col));
+ res = lhs.row(row).cwiseProduct(rhs.col(col)).sum();
}
};
@@ -319,10 +316,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
{
EIGEN_STRONG_INLINE static void run(int /*row*/, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
- res = ei_dot_impl<
- Lhs,
- Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>,
- LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs.col(col));
+ res = lhs.cwiseProduct(rhs.col(col)).sum();
}
};
@@ -331,10 +325,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
{
EIGEN_STRONG_INLINE static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
- res = ei_dot_impl<
- Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
- Rhs,
- LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs);
+ res = lhs.row(row).cwiseProduct(rhs).sum();
}
};
@@ -343,10 +334,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
{
EIGEN_STRONG_INLINE static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
- res = ei_dot_impl<
- Lhs,
- Rhs,
- LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs);
+ res = lhs.cwiseProduct(rhs).sum();
}
};
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 6836a10de..8ac5afb05 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -31,6 +31,7 @@
#define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C);
#else
#define CJMADD(A,B,C,T) T = B; T = cj.pmul(A,T); C = ei_padd(C,T);
+// #define CJMADD(A,B,C,T) T = A; T = cj.pmul(T,B); C = ei_padd(C,T);
#endif
// optimized GEneral packed Block * packed Panel product kernel
@@ -146,7 +147,7 @@ struct ei_gebp_kernel
#endif
// performs "inner" product
- // TODO let's check wether the flowing peeled loop could not be
+ // TODO let's check wether the folowing peeled loop could not be
// optimized via optimal prefetching from one loop to the other
const Scalar* blB = unpackedB;
for(int k=0; k<peeled_kc; k+=4)
@@ -409,6 +410,7 @@ struct ei_gebp_kernel
CJMADD(A0,B2,C2,B2);
B2 = ei_pload(&blB[14*PacketSize]);
CJMADD(A0,B3,C3,B3);
+
A0 = ei_pload(&blA[3*PacketSize]);
B3 = ei_pload(&blB[15*PacketSize]);
CJMADD(A0,B0,C0,B0);
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index 785045db4..2e71b5fd4 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -43,7 +43,10 @@ struct ei_symm_pack_lhs
{
for(int w=0; w<h; w++)
blockA[count++] = ei_conj(lhs(k, i+w)); // transposed
- for(int w=h; w<BlockRows; w++)
+
+ blockA[count++] = ei_real(lhs(k,k)); // real (diagonal)
+
+ for(int w=h+1; w<BlockRows; w++)
blockA[count++] = lhs(i+w, k); // normal
++h;
}
@@ -71,8 +74,11 @@ struct ei_symm_pack_lhs
// do the same with mr==1
for(int i=peeled_mc; i<rows; i++)
{
- for(int k=0; k<=i; k++)
+ for(int k=0; k<i; k++)
blockA[count++] = lhs(i, k); // normal
+
+ blockA[count++] = ei_real(lhs(i, i)); // real (diagonal)
+
for(int k=i+1; k<cols; k++)
blockA[count++] = ei_conj(lhs(k, i)); // transposed
}
@@ -129,8 +135,11 @@ struct ei_symm_pack_rhs
// normal
for (int w=0 ; w<h; ++w)
blockB[count+w] = alpha*rhs(k,j2+w);
+
+ blockB[count+h] = alpha*rhs(k,k);
+
// transpose
- for (int w=h ; w<nr; ++w)
+ for (int w=h+1 ; w<nr; ++w)
blockB[count+w] = alpha*ei_conj(rhs(j2+w,k));
count += nr;
++h;
@@ -175,8 +184,15 @@ struct ei_symm_pack_rhs
blockB[count] = alpha*ei_conj(rhs(j2,k));
count += 1;
}
+
+ if(half==j2)
+ {
+ blockB[count] = alpha*ei_real(rhs(j2,j2));
+ count += 1;
+ }
+
// normal
- for(int k=half; k<k2+rows; k++)
+ for(int k=half+1; k<k2+rows; k++)
{
blockB[count] = alpha*rhs(k,j2);
count += 1;
@@ -389,7 +405,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
* RhsBlasTraits::extractScalarFactor(m_rhs);
ei_product_selfadjoint_matrix<Scalar,
- EIGEN_LOGICAL_XOR(LhsIsUpper,
+ EIGEN_LOGICAL_XOR(LhsIsUpper,
ei_traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
EIGEN_LOGICAL_XOR(RhsIsUpper,
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 2ca463d5d..4d216d77a 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -236,4 +236,22 @@ struct ei_blas_traits<Transpose<NestedXpr> >
static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
};
+template<typename T, int Access=ei_blas_traits<T>::ActualAccess>
+struct ei_extract_data_selector {
+ static const typename T::Scalar* run(const T& m)
+ {
+ return &ei_blas_traits<T>::extract(m).const_cast_derived().coeffRef(0,0); // FIXME this should be .data()
+ }
+};
+
+template<typename T>
+struct ei_extract_data_selector<T,NoDirectAccess> {
+ static typename T::Scalar* run(const T&) { return 0; }
+};
+
+template<typename T> const typename T::Scalar* ei_extract_data(const T& m)
+{
+ return ei_extract_data_selector<T>::run(m);
+}
+
#endif // EIGEN_BLASUTIL_H
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index 51590b03d..c167df697 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -86,11 +86,11 @@ const unsigned int EvalBeforeAssigningBit = 0x4;
* Long version: means that the coefficients can be handled by packets
* and start at a memory location whose alignment meets the requirements
* of the present CPU architecture for optimized packet access. In the fixed-size
- * case, there is the additional condition that the total size of the coefficients
- * array is a multiple of the packet size, so that it is possible to access all the
- * coefficients by packets. In the dynamic-size case, there is no such condition
- * on the total size, so it might not be possible to access the few last coeffs
- * by packets.
+ * case, there is the additional condition that it be possible to access all the
+ * coefficients by packets (this implies the requirement that the size be a multiple of 16 bytes,
+ * and that any nontrivial strides don't break the alignment). In the dynamic-size case,
+ * there is no such condition on the total size and strides, so it might not be possible to access
+ * all coeffs by packets.
*
* \note This bit can be set regardless of whether vectorization is actually enabled.
* To check for actual vectorizability, see \a ActualPacketAccessBit.
@@ -140,7 +140,7 @@ const unsigned int LinearAccessBit = 0x10;
* Means that the underlying array of coefficients can be directly accessed. This means two things.
* First, references to the coefficients must be available through coeffRef(int, int). This rules out read-only
* expressions whose coefficients are computed on demand by coeff(int, int). Second, the memory layout of the
- * array of coefficients must be exactly the natural one suggested by rows(), cols(), stride(), and the RowMajorBit.
+ * array of coefficients must be exactly the natural one suggested by rows(), cols(), outerStride(), innerStride(), and the RowMajorBit.
* This rules out expressions such as Diagonal, whose coefficients, though referencable, do not have
* such a regular memory layout.
*/
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index a56e4fb84..aa01fdab2 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -1,7 +1,8 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -40,8 +41,10 @@ template<typename ExpressionType> class NestByValue;
template<typename ExpressionType> class ForceAlignedAccess;
template<typename ExpressionType> class SwapWrapper;
template<typename MatrixType> class Minor;
+// MSVC will not compile when the expression ei_traits<MatrixType>::Flags&DirectAccessBit
+// is put into brackets like (ei_traits<MatrixType>::Flags&DirectAccessBit)!
template<typename MatrixType, int BlockRows=Dynamic, int BlockCols=Dynamic,
- int _DirectAccessStatus = (ei_traits<MatrixType>::Flags&DirectAccessBit) ? HasDirectAccess : NoDirectAccess> class Block;
+ int _DirectAccessStatus = ei_traits<MatrixType>::Flags&DirectAccessBit ? HasDirectAccess : NoDirectAccess> class Block;
template<typename MatrixType, int Size=Dynamic> class VectorBlock;
template<typename MatrixType> class Transpose;
template<typename MatrixType> class Conjugate;
@@ -60,7 +63,9 @@ template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeA
template<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct;
template<typename MatrixType, int Index> class Diagonal;
-template<typename MatrixType, int Options=Unaligned> class Map;
+template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride;
+template<typename MatrixType, int Options=Unaligned, typename StrideType = Stride<0,0> > class Map;
+
template<typename Derived> class TriangularBase;
template<typename MatrixType, unsigned int Mode> class TriangularView;
template<typename MatrixType, unsigned int Mode> class SelfAdjointView;
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index ba92f2370..7968d6604 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -39,7 +39,7 @@
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable 16 byte alignment on all
// platforms where vectorization might be enabled. In theory we could always enable alignment, but it can be a cause of problems
// on some platforms, so we just disable it in certain common platform (compiler+architecture combinations) to avoid these problems.
-#if defined(__GNUC__) && !(defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ppc__) || defined(__ia64__))
+#if defined(__GNUC__) && !(defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ppc__) || defined(__ia64__) || defined(__ARM_NEON__))
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT 1
#else
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT 0
@@ -78,30 +78,6 @@
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ColMajor
#endif
-/** Defines the maximal loop size to enable meta unrolling of loops.
- * Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
- * it does not correspond to the number of iterations or the number of instructions
- */
-#ifndef EIGEN_UNROLLING_LIMIT
-#define EIGEN_UNROLLING_LIMIT 100
-#endif
-
-/** Defines the maximal size in Bytes of blocks fitting in CPU cache.
- * The current value is set to generate blocks of 256x256 for float
- *
- * Typically for a single-threaded application you would set that to 25% of the size of your CPU caches in bytes
- */
-#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
-#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*256*256)
-#endif
-
-/** Defines the maximal width of the blocks used in the triangular product and solver
- * for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
- */
-#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
-#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
-#endif
-
/** Allows to disable some optimizations which might affect the accuracy of the result.
* Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
* They currently include:
@@ -211,7 +187,7 @@ using Eigen::ei_cos;
*/
#if !EIGEN_ALIGN
#define EIGEN_ALIGN_TO_BOUNDARY(n)
-#elif (defined __GNUC__)
+#elif (defined __GNUC__) || (defined __PGI)
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#elif (defined _MSC_VER)
#define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index c7b95d334..4d037b998 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -4,6 +4,7 @@
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
+// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -24,26 +25,49 @@
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+/*****************************************************************************
+*** Platform checks for aligned malloc functions ***
+*****************************************************************************/
+
#ifndef EIGEN_MEMORY_H
#define EIGEN_MEMORY_H
+// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
+// http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
+// This is true at least since glibc 2.8.
+// This leaves the question how to detect 64-bit. According to this document,
+// http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
+// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
+// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
+#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
+ && defined(__LP64__)
+ #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
+#else
+ #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
+#endif
+
// FreeBSD 6 seems to have 16-byte aligned malloc
-// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
+// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
-// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
+// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
#if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__)
-#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
+ #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
#else
-#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
+ #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
#endif
-#if defined(__APPLE__) || defined(_WIN64) || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
+#if defined(__APPLE__) \
+ || defined(_WIN64) \
+ || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
+ || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
#define EIGEN_MALLOC_ALREADY_ALIGNED 1
#else
#define EIGEN_MALLOC_ALREADY_ALIGNED 0
#endif
-#if ((defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
+#if ((defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \
+ && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
#define EIGEN_HAS_POSIX_MEMALIGN 1
#else
#define EIGEN_HAS_POSIX_MEMALIGN 0
@@ -55,26 +79,90 @@
#define EIGEN_HAS_MM_MALLOC 0
#endif
-/** \internal like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
- * Fast, but wastes 16 additional bytes of memory.
- * Does not throw any exception.
+/*****************************************************************************
+*** Implementation of handmade aligned functions ***
+*****************************************************************************/
+
+/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
+
+/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
+ * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
*/
inline void* ei_handmade_aligned_malloc(size_t size)
{
void *original = std::malloc(size+16);
+ if (original == 0) return 0;
void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
-/** \internal frees memory allocated with ei_handmade_aligned_malloc */
+/** \internal Frees memory allocated with ei_handmade_aligned_malloc */
inline void ei_handmade_aligned_free(void *ptr)
{
- if(ptr)
- std::free(*(reinterpret_cast<void**>(ptr) - 1));
+ if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
+}
+
+/** \internal
+ * \brief Reallocates aligned memory.
+ * Since we know that our handmade version is based on std::realloc
+ * we can use std::realloc to implement efficient reallocation.
+ */
+inline void* ei_handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
+{
+ if (ptr == 0) return ei_handmade_aligned_malloc(size);
+ void *original = *(reinterpret_cast<void**>(ptr) - 1);
+ original = std::realloc(original,size+16);
+ if (original == 0) return 0;
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+ *(reinterpret_cast<void**>(aligned) - 1) = original;
+ return aligned;
+}
+
+/*****************************************************************************
+*** Implementation of generic aligned realloc (when no realloc can be used)***
+*****************************************************************************/
+
+void* ei_aligned_malloc(size_t size);
+void ei_aligned_free(void *ptr);
+
+/** \internal
+ * \brief Reallocates aligned memory.
+ * Allows reallocation with aligned ptr types. This implementation will
+ * always create a new memory chunk and copy the old data.
+ */
+inline void* ei_generic_aligned_realloc(void* ptr, size_t size, size_t old_size)
+{
+ if (ptr==0)
+ return ei_aligned_malloc(size);
+
+ if (size==0)
+ {
+ ei_aligned_free(ptr);
+ return 0;
+ }
+
+ void* newptr = ei_aligned_malloc(size);
+ if (newptr == 0)
+ {
+ errno = ENOMEM; // according to the standard
+ return 0;
+ }
+
+ if (ptr != 0)
+ {
+ std::memcpy(newptr, ptr, std::min(size,old_size));
+ ei_aligned_free(ptr);
+ }
+
+ return newptr;
}
-/** \internal allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment.
+/*****************************************************************************
+*** Implementation of portable aligned versions of malloc/free/realloc ***
+*****************************************************************************/
+
+/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment.
* On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
*/
inline void* ei_aligned_malloc(size_t size)
@@ -85,9 +173,9 @@ inline void* ei_aligned_malloc(size_t size)
void *result;
#if !EIGEN_ALIGN
- result = malloc(size);
+ result = std::malloc(size);
#elif EIGEN_MALLOC_ALREADY_ALIGNED
- result = malloc(size);
+ result = std::malloc(size);
#elif EIGEN_HAS_POSIX_MEMALIGN
if(posix_memalign(&result, 16, size)) result = 0;
#elif EIGEN_HAS_MM_MALLOC
@@ -105,7 +193,67 @@ inline void* ei_aligned_malloc(size_t size)
return result;
}
-/** allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
+/** \internal Frees memory allocated with ei_aligned_malloc. */
+inline void ei_aligned_free(void *ptr)
+{
+ #if !EIGEN_ALIGN
+ std::free(ptr);
+ #elif EIGEN_MALLOC_ALREADY_ALIGNED
+ std::free(ptr);
+ #elif EIGEN_HAS_POSIX_MEMALIGN
+ std::free(ptr);
+ #elif EIGEN_HAS_MM_MALLOC
+ _mm_free(ptr);
+ #elif defined(_MSC_VER)
+ _aligned_free(ptr);
+ #else
+ ei_handmade_aligned_free(ptr);
+ #endif
+}
+
+/**
+* \internal
+* \brief Reallocates an aligned block of memory.
+* \throws std::bad_alloc if EIGEN_EXCEPTIONS are defined.
+**/
+inline void* ei_aligned_realloc(void *ptr, size_t new_size, size_t old_size)
+{
+ (void)old_size; // Suppress 'unused variable' warning. Seen in boost tee.
+
+ void *result;
+#if !EIGEN_ALIGN
+ result = std::realloc(ptr,new_size);
+#elif EIGEN_MALLOC_ALREADY_ALIGNED
+ result = std::realloc(ptr,new_size);
+#elif EIGEN_HAS_POSIX_MEMALIGN
+ result = ei_generic_aligned_realloc(ptr,new_size,old_size);
+#elif EIGEN_HAS_MM_MALLOC
+ // The defined(_mm_free) is just here to verify that this MSVC version
+ // implements _mm_malloc/_mm_free based on the corresponding _aligned_
+ // functions. This may not always be the case and we just try to be safe.
+ #if defined(_MSC_VER) && defined(_mm_free)
+ result = _aligned_realloc(ptr,new_size,16);
+ #else
+ result = ei_generic_aligned_realloc(ptr,new_size,old_size);
+ #endif
+#elif defined(_MSC_VER)
+ result = _aligned_realloc(ptr,new_size,16);
+#else
+ result = ei_handmade_aligned_realloc(ptr,new_size,old_size);
+#endif
+
+#ifdef EIGEN_EXCEPTIONS
+ if (result==0 && new_size!=0)
+ throw std::bad_alloc();
+#endif
+ return result;
+}
+
+/*****************************************************************************
+*** Implementation of conditionally aligned functions ***
+*****************************************************************************/
+
+/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
* On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
*/
template<bool Align> inline void* ei_conditional_aligned_malloc(size_t size)
@@ -126,63 +274,41 @@ template<> inline void* ei_conditional_aligned_malloc<false>(size_t size)
return result;
}
-/** \internal construct the elements of an array.
- * The \a size parameter tells on how many objects to call the constructor of T.
- */
-template<typename T> inline T* ei_construct_elements_of_array(T *ptr, size_t size)
+/** \internal Frees memory allocated with ei_conditional_aligned_malloc */
+template<bool Align> inline void ei_conditional_aligned_free(void *ptr)
{
- for (size_t i=0; i < size; ++i) ::new (ptr + i) T;
- return ptr;
+ ei_aligned_free(ptr);
}
-/** allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
- * On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown.
- * The default constructor of T is called.
- */
-template<typename T> inline T* ei_aligned_new(size_t size)
+template<> inline void ei_conditional_aligned_free<false>(void *ptr)
{
- T *result = reinterpret_cast<T*>(ei_aligned_malloc(sizeof(T)*size));
- return ei_construct_elements_of_array(result, size);
+ std::free(ptr);
}
-template<typename T, bool Align> inline T* ei_conditional_aligned_new(size_t size)
+template<bool Align> inline void* ei_conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size)
{
- T *result = reinterpret_cast<T*>(ei_conditional_aligned_malloc<Align>(sizeof(T)*size));
- return ei_construct_elements_of_array(result, size);
+ return ei_aligned_realloc(ptr, new_size, old_size);
}
-/** \internal free memory allocated with ei_aligned_malloc
- */
-inline void ei_aligned_free(void *ptr)
+template<> inline void* ei_conditional_aligned_realloc<false>(void* ptr, size_t new_size, size_t)
{
- #if !EIGEN_ALIGN
- free(ptr);
- #elif EIGEN_MALLOC_ALREADY_ALIGNED
- free(ptr);
- #elif EIGEN_HAS_POSIX_MEMALIGN
- free(ptr);
- #elif EIGEN_HAS_MM_MALLOC
- _mm_free(ptr);
- #elif defined(_MSC_VER)
- _aligned_free(ptr);
- #else
- ei_handmade_aligned_free(ptr);
- #endif
+ return std::realloc(ptr, new_size);
}
-/** \internal free memory allocated with ei_conditional_aligned_malloc
- */
-template<bool Align> inline void ei_conditional_aligned_free(void *ptr)
-{
- ei_aligned_free(ptr);
-}
+/*****************************************************************************
+*** Construction/destruction of array elements ***
+*****************************************************************************/
-template<> inline void ei_conditional_aligned_free<false>(void *ptr)
+/** \internal Constructs the elements of an array.
+ * The \a size parameter tells on how many objects to call the constructor of T.
+ */
+template<typename T> inline T* ei_construct_elements_of_array(T *ptr, size_t size)
{
- std::free(ptr);
+ for (size_t i=0; i < size; ++i) ::new (ptr + i) T;
+ return ptr;
}
-/** \internal destruct the elements of an array.
+/** \internal Destructs the elements of an array.
* The \a size parameters tells on how many objects to call the destructor of T.
*/
template<typename T> inline void ei_destruct_elements_of_array(T *ptr, size_t size)
@@ -191,7 +317,27 @@ template<typename T> inline void ei_destruct_elements_of_array(T *ptr, size_t si
while(size) ptr[--size].~T();
}
-/** \internal delete objects constructed with ei_aligned_new
+/*****************************************************************************
+*** Implementation of aligned new/delete-like functions ***
+*****************************************************************************/
+
+/** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
+ * On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown.
+ * The default constructor of T is called.
+ */
+template<typename T> inline T* ei_aligned_new(size_t size)
+{
+ T *result = reinterpret_cast<T*>(ei_aligned_malloc(sizeof(T)*size));
+ return ei_construct_elements_of_array(result, size);
+}
+
+template<typename T, bool Align> inline T* ei_conditional_aligned_new(size_t size)
+{
+ T *result = reinterpret_cast<T*>(ei_conditional_aligned_malloc<Align>(sizeof(T)*size));
+ return ei_construct_elements_of_array(result, size);
+}
+
+/** \internal Deletes objects constructed with ei_aligned_new
* The \a size parameters tells on how many objects to call the destructor of T.
*/
template<typename T> inline void ei_aligned_delete(T *ptr, size_t size)
@@ -200,7 +346,7 @@ template<typename T> inline void ei_aligned_delete(T *ptr, size_t size)
ei_aligned_free(ptr);
}
-/** \internal delete objects constructed with ei_conditional_aligned_new
+/** \internal Deletes objects constructed with ei_conditional_aligned_new
* The \a size parameters tells on how many objects to call the destructor of T.
*/
template<typename T, bool Align> inline void ei_conditional_aligned_delete(T *ptr, size_t size)
@@ -209,7 +355,17 @@ template<typename T, bool Align> inline void ei_conditional_aligned_delete(T *pt
ei_conditional_aligned_free<Align>(ptr);
}
-/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
+template<typename T, bool Align> inline T* ei_conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size)
+{
+ T *result = reinterpret_cast<T*>(ei_conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
+ if (new_size > old_size)
+ ei_construct_elements_of_array(result+old_size, new_size-old_size);
+ return result;
+}
+
+/****************************************************************************/
+
+/** \internal Returns the index of the first element of the array that is well aligned for vectorization.
*
* \param array the address of the start of the array
* \param size the size of the array
@@ -236,7 +392,7 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size)
if(PacketSize==1)
{
// Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
- // of the array have the same aligment.
+ // of the array have the same alignment.
return 0;
}
else if(size_t(array) & (sizeof(Scalar)-1))
@@ -252,19 +408,23 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size)
}
}
+/*****************************************************************************
+*** Implementation of runtime stack allocation (falling back to malloc) ***
+*****************************************************************************/
+
/** \internal
- * ei_aligned_stack_alloc(SIZE) allocates an aligned buffer of SIZE bytes
- * on the stack if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and
- * if stack allocation is supported by the platform (currently, this is linux only).
- * Otherwise the memory is allocated on the heap.
- * Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,SIZE).
+ * Allocates an aligned buffer of SIZE bytes on the stack if SIZE is smaller than
+ * EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
+ * (currently, this is Linux only). Otherwise the memory is allocated on the heap.
+ * Data allocated with ei_aligned_stack_alloc \b must be freed by calling
+ * ei_aligned_stack_free(PTR,SIZE).
* \code
* float * data = ei_aligned_stack_alloc(float,array.size());
* // ...
* ei_aligned_stack_free(data,float,array.size());
* \endcode
*/
-#ifdef __linux__
+#if (defined __linux__) && !(defined __ARM_NEON__)
#define ei_aligned_stack_alloc(SIZE) (SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) \
? alloca(SIZE) \
: ei_aligned_malloc(SIZE)
@@ -279,6 +439,10 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size)
ei_aligned_stack_free(PTR,sizeof(TYPE)*SIZE);} while(0)
+/*****************************************************************************
+*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
+*****************************************************************************/
+
#if EIGEN_ALIGN
#ifdef EIGEN_EXCEPTIONS
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
@@ -322,10 +486,11 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size)
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%16==0))
+/****************************************************************************/
/** \class aligned_allocator
*
-* \brief stl compatible allocator to use with with 16 byte aligned types
+* \brief STL compatible allocator to use with with 16 byte aligned types
*
* Example:
* \code
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h
index 619e7664d..5252b28c5 100644
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -46,7 +46,7 @@
// if native static_assert is enabled, let's use it
#define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
- #else // CXX0X
+ #else // not CXX0X
template<bool condition>
struct ei_static_assert {};
@@ -81,7 +81,8 @@
BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
- THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES,
+ YOU_ALREADY_SPECIFIED_THIS_STRIDE
};
};
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index f1ed4ed9f..eff055b04 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -50,7 +50,7 @@ template<int Value> class ei_int_if_dynamic
{
public:
EIGEN_EMPTY_STRUCT_CTOR(ei_int_if_dynamic)
- explicit ei_int_if_dynamic(int) {}
+ explicit ei_int_if_dynamic(int v) { EIGEN_ONLY_USED_FOR_DEBUG(v); ei_assert(v == Value); }
static int value() { return Value; }
void setValue(int) {}
};
@@ -58,7 +58,7 @@ template<int Value> class ei_int_if_dynamic
template<> class ei_int_if_dynamic<Dynamic>
{
int m_value;
- ei_int_if_dynamic() {}
+ ei_int_if_dynamic() { ei_assert(false); }
public:
explicit ei_int_if_dynamic(int value) : m_value(value) {}
int value() const { return m_value; }
@@ -87,12 +87,17 @@ class ei_compute_matrix_flags
{
enum {
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
- inner_max_size = MaxCols==1 ? MaxRows
- : MaxRows==1 ? MaxCols
- : row_major_bit ? MaxCols : MaxRows,
- is_big = inner_max_size == Dynamic,
- is_packet_size_multiple = MaxRows==Dynamic || MaxCols==Dynamic || ((MaxCols*MaxRows) % ei_packet_traits<Scalar>::size) == 0,
- aligned_bit = (((Options&DontAlign)==0) && (is_big || is_packet_size_multiple)) ? AlignedBit : 0,
+ is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
+#if !defined(__ARM_NEON__)
+ is_fixed_size_aligned
+ = (!is_dynamic_size_storage) && (((MaxCols*MaxRows) % ei_packet_traits<Scalar>::size) == 0),
+#else
+// FIXME!!! This is a hack because ARM gcc does not honour __attribute__((aligned(16))) properly
+ is_fixed_size_aligned = 0,
+#endif
+ aligned_bit = (((Options&DontAlign)==0)
+ && (is_dynamic_size_storage || is_fixed_size_aligned))
+ ? AlignedBit : 0,
packet_access_bit = ei_packet_traits<Scalar>::size > 1 && aligned_bit ? PacketAccessBit : 0
};
@@ -214,7 +219,7 @@ struct ei_is_reference<T&>
};
/**
-* The reference selector for template expressions. The idea is that we don't
+* \internal The reference selector for template expressions. The idea is that we don't
* need to use references for expressions since they are light weight proxy
* objects which should generate no copying overhead.
**/
diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h
index 5deac3247..c45151e82 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -154,6 +154,14 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
m_matT = hess.matrixH();
if(!skipU) m_matU = hess.matrixQ();
+
+ // Reduce the Hessenberg matrix m_matT to triangular form by QR iteration.
+
+ // The matrix m_matT is divided in three parts.
+ // Rows 0,...,il-1 are decoupled from the rest because m_matT(il,il-1) is zero.
+ // Rows il,...,iu is the part we are working on (the active submatrix).
+ // Rows iu+1,...,end are already brought in triangular form.
+
int iu = m_matT.cols() - 1;
int il;
RealScalar d,sd,sf;
@@ -164,7 +172,7 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
int iter = 0;
while(true)
{
- //locate the range in which to iterate
+ // find iu, the bottom row of the active submatrix
while(iu > 0)
{
d = ei_norm1(m_matT.coeff(iu,iu)) + ei_norm1(m_matT.coeff(iu-1,iu-1));
@@ -183,10 +191,11 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
if(iter >= 30)
{
// FIXME : what to do when iter==MAXITER ??
- std::cerr << "MAXITER" << std::endl;
+ //std::cerr << "MAXITER" << std::endl;
return;
}
+ // find il, the top row of the active submatrix
il = iu-1;
while(il > 0)
{
@@ -202,16 +211,19 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
if( il != 0 ) m_matT.coeffRef(il,il-1) = Complex(0);
- // compute the shift (the normalization by sf is to avoid under/overflow)
+ // compute the shift kappa as one of the eigenvalues of the 2x2
+ // diagonal block on the bottom of the active submatrix
+
Matrix<Scalar,2,2> t = m_matT.template block<2,2>(iu-1,iu-1);
sf = t.cwiseAbs().sum();
- t /= sf;
-
- c = t.determinant();
- b = t.diagonal().sum();
+ t /= sf; // the normalization by sf is to avoid under/overflow
- disc = ei_sqrt(b*b - RealScalar(4)*c);
+ b = t.coeff(0,1) * t.coeff(1,0);
+ c = t.coeff(0,0) - t.coeff(1,1);
+ disc = ei_sqrt(c*c + RealScalar(4)*b);
+ c = t.coeff(0,0) * t.coeff(1,1) - b;
+ b = t.coeff(0,0) + t.coeff(1,1);
r1 = (b+disc)/RealScalar(2);
r2 = (b-disc)/RealScalar(2);
@@ -224,6 +236,12 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
kappa = sf * r1;
else
kappa = sf * r2;
+
+ if (iter == 10 || iter == 20)
+ {
+ // exceptional shift, taken from http://www.netlib.org/eispack/comqr.f
+ kappa = ei_abs(ei_real(m_matT.coeff(iu,iu-1))) + ei_abs(ei_real(m_matT.coeff(iu-1,iu-2)));
+ }
// perform the QR step using Givens rotations
PlanarRotation<Complex> rot;
@@ -246,18 +264,6 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
}
}
- // FIXME : is it necessary ?
- /*
- for(int i=0 ; i<n ; i++)
- for(int j=0 ; j<n ; j++)
- {
- if(ei_abs(ei_real(m_matT.coeff(i,j))) < eps)
- ei_real_ref(m_matT.coeffRef(i,j)) = 0;
- if(ei_imag(ei_abs(m_matT.coeff(i,j))) < eps)
- ei_imag_ref(m_matT.coeffRef(i,j)) = 0;
- }
- */
-
m_isInitialized = true;
m_matUisUptodate = !skipU;
}
diff --git a/Eigen/src/Geometry/Scaling.h b/Eigen/src/Geometry/Scaling.h
index 2ff8eaba3..27bbec8cd 100644
--- a/Eigen/src/Geometry/Scaling.h
+++ b/Eigen/src/Geometry/Scaling.h
@@ -72,8 +72,8 @@ public:
inline Transform<Scalar,Dim> operator* (const Translation<Scalar,Dim>& t) const;
/** Concatenates a uniform scaling and an affine transformation */
- template<int Dim>
- inline Transform<Scalar,Dim> operator* (const Transform<Scalar,Dim>& t) const;
+ template<int Dim, int Mode>
+ inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim, Mode>& t) const;
/** Concatenates a uniform scaling and a linear transformation matrix */
// TODO returns an expression
@@ -156,4 +156,27 @@ typedef DiagonalMatrix<float, 3> AlignedScaling3f;
typedef DiagonalMatrix<double,3> AlignedScaling3d;
//@}
+template<typename Scalar>
+template<int Dim>
+inline Transform<Scalar,Dim>
+UniformScaling<Scalar>::operator* (const Translation<Scalar,Dim>& t) const
+{
+ Transform<Scalar,Dim> res;
+ res.matrix().setZero();
+ res.linear().diagonal().fill(factor());
+ res.translation() = factor() * t.vector();
+ res(Dim,Dim) = Scalar(1);
+ return res;
+}
+
+template<typename Scalar>
+template<int Dim,int Mode>
+inline Transform<Scalar,Dim,Mode>
+UniformScaling<Scalar>::operator* (const Transform<Scalar,Dim, Mode>& t) const
+{
+ Transform<Scalar,Dim> res = t;
+ res.prescale(factor());
+ return res;
+}
+
#endif // EIGEN_SCALING_H
diff --git a/Eigen/src/LU/Determinant.h b/Eigen/src/LU/Determinant.h
index fb6577f08..d0b70a31c 100644
--- a/Eigen/src/LU/Determinant.h
+++ b/Eigen/src/LU/Determinant.h
@@ -69,7 +69,7 @@ template<typename Derived> struct ei_determinant_impl<Derived, 2>
template<typename Derived> struct ei_determinant_impl<Derived, 3>
{
- static typename ei_traits<Derived>::Scalar run(const Derived& m)
+ static inline typename ei_traits<Derived>::Scalar run(const Derived& m)
{
return ei_bruteforce_det3_helper(m,0,1,2)
- ei_bruteforce_det3_helper(m,1,0,2)
@@ -100,8 +100,7 @@ inline typename ei_traits<Derived>::Scalar MatrixBase<Derived>::determinant() co
{
assert(rows() == cols());
typedef typename ei_nested<Derived,Base::RowsAtCompileTime>::type Nested;
- Nested nested(derived());
- return ei_determinant_impl<typename ei_cleantype<Nested>::type>::run(nested);
+ return ei_determinant_impl<typename ei_cleantype<Nested>::type>::run(derived());
}
#endif // EIGEN_DETERMINANT_H
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 9afc448cc..dea6ec41c 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -119,7 +119,7 @@ template<typename _MatrixType> class FullPivLU
* diagonal coefficient of U.
*/
RealScalar maxPivot() const { return m_maxpivot; }
-
+
/** \returns the permutation matrix P
*
* \sa permutationQ()
@@ -361,6 +361,8 @@ template<typename _MatrixType> class FullPivLU
(*this, MatrixType::Identity(m_lu.rows(), m_lu.cols()));
}
+ MatrixType reconstructedMatrix() const;
+
inline int rows() const { return m_lu.rows(); }
inline int cols() const { return m_lu.cols(); }
@@ -404,6 +406,7 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
m_maxpivot = RealScalar(0);
+ RealScalar cutoff(0);
for(int k = 0; k < size; ++k)
{
@@ -418,8 +421,14 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner,
col_of_biggest_in_corner += k; // need to add k to them.
- // if the pivot (hence the corner) is exactly zero, terminate to avoid generating nan/inf values
- if(biggest_in_corner == RealScalar(0))
+ // when k==0, biggest_in_corner is the biggest coeff absolute value in the original matrix
+ if(k == 0) cutoff = biggest_in_corner * NumTraits<Scalar>::epsilon();
+
+ // if the pivot (hence the corner) is "zero", terminate to avoid generating nan/inf values.
+ // Notice that using an exact comparison (biggest_in_corner==0) here, as Golub-van Loan do in
+ // their pseudo-code, results in numerical instability! The cutoff here has been validated
+ // by running the unit test 'lu' with many repetitions.
+ if(biggest_in_corner < cutoff)
{
// before exiting, make sure to initialize the still uninitialized transpositions
// in a sane state without destroying what we already have.
@@ -480,6 +489,31 @@ typename ei_traits<MatrixType>::Scalar FullPivLU<MatrixType>::determinant() cons
return Scalar(m_det_pq) * Scalar(m_lu.diagonal().prod());
}
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: P^{-1} L U Q^{-1}.
+ * This function is provided for debug purpose. */
+template<typename MatrixType>
+MatrixType FullPivLU<MatrixType>::reconstructedMatrix() const
+{
+ ei_assert(m_isInitialized && "LU is not initialized.");
+ const int smalldim = std::min(m_lu.rows(), m_lu.cols());
+ // LU
+ MatrixType res(m_lu.rows(),m_lu.cols());
+ // FIXME the .toDenseMatrix() should not be needed...
+ res = m_lu.corner(TopLeft,m_lu.rows(),smalldim)
+ .template triangularView<UnitLower>().toDenseMatrix()
+ * m_lu.corner(TopLeft,smalldim,m_lu.cols())
+ .template triangularView<Upper>().toDenseMatrix();
+
+ // P^{-1}(LU)
+ res = m_p.inverse() * res;
+
+ // (P^{-1}LU)Q^{-1}
+ res = res * m_q.inverse();
+
+ return res;
+}
+
/********* Implementation of kernel() **************************************************/
template<typename _MatrixType>
diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/Inverse.h
index e20da70d6..116a614e1 100644
--- a/Eigen/src/LU/Inverse.h
+++ b/Eigen/src/LU/Inverse.h
@@ -123,7 +123,7 @@ struct ei_compute_inverse_and_det_with_check<MatrixType, ResultType, 2>
****************************/
template<typename MatrixType, typename ResultType>
-void ei_compute_inverse_size3_helper(
+inline void ei_compute_inverse_size3_helper(
const MatrixType& matrix,
const typename ResultType::Scalar& invdet,
const Matrix<typename ResultType::Scalar,3,1>& cofactors_col0,
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index ed2354d78..df36cb04d 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -165,6 +165,8 @@ template<typename _MatrixType> class PartialPivLU
*/
typename ei_traits<MatrixType>::Scalar determinant() const;
+ MatrixType reconstructedMatrix() const;
+
inline int rows() const { return m_lu.rows(); }
inline int cols() const { return m_lu.cols(); }
@@ -194,7 +196,7 @@ PartialPivLU<MatrixType>::PartialPivLU(const MatrixType& matrix)
compute(matrix);
}
-/** This is the blocked version of ei_fullpivlu_unblocked() */
+/** \internal This is the blocked version of ei_fullpivlu_unblocked() */
template<typename Scalar, int StorageOrder>
struct ei_partial_lu_impl
{
@@ -368,7 +370,7 @@ void ei_partial_lu_inplace(MatrixType& lu, IntVector& row_transpositions, int& n
ei_partial_lu_impl
<typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor>
- ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.stride(), &row_transpositions.coeffRef(0), nb_transpositions);
+ ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);
}
template<typename MatrixType>
@@ -400,6 +402,23 @@ typename ei_traits<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() c
return Scalar(m_det_p) * m_lu.diagonal().prod();
}
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: P^{-1} L U.
+ * This function is provided for debug purpose. */
+template<typename MatrixType>
+MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
+{
+ ei_assert(m_isInitialized && "LU is not initialized.");
+ // LU
+ MatrixType res = m_lu.template triangularView<UnitLower>().toDenseMatrix()
+ * m_lu.template triangularView<Upper>();
+
+ // P^{-1}(LU)
+ res = m_p.inverse() * res;
+
+ return res;
+}
+
/***** Implementation of solve() *****************************************************/
template<typename _MatrixType, typename Rhs>
diff --git a/Eigen/src/Sparse/CholmodSupport.h b/Eigen/src/Sparse/CholmodSupport.h
index fd33b1507..fbd035ce4 100644
--- a/Eigen/src/Sparse/CholmodSupport.h
+++ b/Eigen/src/Sparse/CholmodSupport.h
@@ -99,7 +99,7 @@ cholmod_dense ei_cholmod_map_eigen_to_dense(MatrixBase<Derived>& mat)
res.nrow = mat.rows();
res.ncol = mat.cols();
res.nzmax = res.nrow * res.ncol;
- res.d = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().stride();
+ res.d = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().outerStride();
res.x = mat.derived().data();
res.z = 0;
@@ -233,7 +233,7 @@ bool SparseLLT<MatrixType,Cholmod>::solveInPlace(MatrixBase<Derived> &b) const
cholmod_dense* x = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &cdb, &m_cholmod);
if(!x)
{
- std::cerr << "Eigen: cholmod_solve failed\n";
+ //std::cerr << "Eigen: cholmod_solve failed\n";
return false;
}
b = Matrix<typename Base::Scalar,Dynamic,1>::Map(reinterpret_cast<typename Base::Scalar*>(x->x),b.rows());
diff --git a/Eigen/src/Sparse/DynamicSparseMatrix.h b/Eigen/src/Sparse/DynamicSparseMatrix.h
index 2594ffebc..d73dce229 100644
--- a/Eigen/src/Sparse/DynamicSparseMatrix.h
+++ b/Eigen/src/Sparse/DynamicSparseMatrix.h
@@ -236,7 +236,7 @@ class DynamicSparseMatrix
{
// remove all coefficients with innerCoord>=innerSize
// TODO
- std::cerr << "not implemented yet\n";
+ //std::cerr << "not implemented yet\n";
exit(2);
}
if (m_data.size() != outerSize)
diff --git a/Eigen/src/Sparse/SuperLUSupport.h b/Eigen/src/Sparse/SuperLUSupport.h
index 1a765c75b..18a967539 100644
--- a/Eigen/src/Sparse/SuperLUSupport.h
+++ b/Eigen/src/Sparse/SuperLUSupport.h
@@ -161,7 +161,7 @@ struct SluMatrix : SuperMatrix
res.nrow = mat.rows();
res.ncol = mat.cols();
- res.storage.lda = MatrixType::IsVectorAtCompileTime ? mat.size() : mat.stride();
+ res.storage.lda = MatrixType::IsVectorAtCompileTime ? mat.size() : mat.outerStride();
res.storage.values = mat.data();
return res;
}
@@ -217,7 +217,7 @@ struct SluMatrixMapHelper<Matrix<Scalar,Rows,Cols,Options,MRows,MCols> >
res.nrow = mat.rows();
res.ncol = mat.cols();
- res.storage.lda = mat.stride();
+ res.storage.lda = mat.outerStride();
res.storage.values = mat.data();
}
};
@@ -397,7 +397,7 @@ void SparseLU<MatrixType,SuperLU>::compute(const MatrixType& a)
case MinimumDegree_ATA : m_sluOptions.ColPerm = MMD_ATA; break;
case ColApproxMinimumDegree : m_sluOptions.ColPerm = COLAMD; break;
default:
- std::cerr << "Eigen: ordering method \"" << Base::orderingMethod() << "\" not supported by the SuperLU backend\n";
+ //std::cerr << "Eigen: ordering method \"" << Base::orderingMethod() << "\" not supported by the SuperLU backend\n";
m_sluOptions.ColPerm = NATURAL;
};
@@ -448,7 +448,7 @@ void SparseLU<MatrixType,SuperLU>::compute(const MatrixType& a)
&recip_pivot_gross, &rcond,
&m_sluStat, &info, Scalar());
#else
- std::cerr << "Incomplete factorization is only available in SuperLU v4\n";
+ //std::cerr << "Incomplete factorization is only available in SuperLU v4\n";
Base::m_succeeded = false;
return;
#endif
@@ -486,7 +486,7 @@ bool SparseLU<MatrixType,SuperLU>::solve(const MatrixBase<BDerived> &b,
case SvTranspose : m_sluOptions.Trans = TRANS; break;
case SvAdjoint : m_sluOptions.Trans = CONJ; break;
default:
- std::cerr << "Eigen: transposition option \"" << transposed << "\" not supported by the SuperLU backend\n";
+ //std::cerr << "Eigen: transposition option \"" << transposed << "\" not supported by the SuperLU backend\n";
m_sluOptions.Trans = NOTRANS;
}
@@ -513,7 +513,7 @@ bool SparseLU<MatrixType,SuperLU>::solve(const MatrixBase<BDerived> &b,
&recip_pivot_gross, &rcond,
&m_sluStat, &info, Scalar());
#else
- std::cerr << "Incomplete factorization is only available in SuperLU v4\n";
+ //std::cerr << "Incomplete factorization is only available in SuperLU v4\n";
return false;
#endif
}